diff --git a/.agignore b/.agignore
new file mode 100644
index 00000000..1521c8b7
--- /dev/null
+++ b/.agignore
@@ -0,0 +1 @@
+dist
diff --git a/dist/mercury.js b/dist/mercury.js
new file mode 100644
index 00000000..b81e7410
--- /dev/null
+++ b/dist/mercury.js
@@ -0,0 +1,3807 @@
+'use strict';
+
+function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; }
+
+var URL = _interopDefault(require('url'));
+var babelPolyfill = require('babel-polyfill');
+var cheerio = _interopDefault(require('cheerio'));
+var request = _interopDefault(require('request'));
+var stringDirection = _interopDefault(require('string-direction'));
+var validUrl = _interopDefault(require('valid-url'));
+var moment = _interopDefault(require('moment'));
+var wuzzy = _interopDefault(require('wuzzy'));
+var difflib = _interopDefault(require('difflib'));
+var ellipsize = _interopDefault(require('ellipsize'));
+
+var _marked = [range].map(regeneratorRuntime.mark);
+
+function range() {
+  var start = arguments.length <= 0 || arguments[0] === undefined ? 1 : arguments[0];
+  var end = arguments.length <= 1 || arguments[1] === undefined ? 1 : arguments[1];
+  return regeneratorRuntime.wrap(function range$(_context) {
+    while (1) {
+      switch (_context.prev = _context.next) {
+        case 0:
+          if (!(start <= end)) {
+            _context.next = 5;
+            break;
+          }
+
+          _context.next = 3;
+          return start += 1;
+
+        case 3:
+          _context.next = 0;
+          break;
+
+        case 5:
+        case "end":
+          return _context.stop();
+      }
+    }
+  }, _marked[0], this);
+}
+
+// extremely simple url validation as a first step
+function validateUrl(_ref) {
+  var hostname = _ref.hostname;
+
+  // If this isn't a valid url, return an error message
+  return !!hostname;
+}
+
+var Errors = {
+  badUrl: {
+    error: true,
+    messages: 'The url parameter passed does not look like a valid URL. Please check your data and try again.'
+  }
+};
+
+var REQUEST_HEADERS = {
+  'User-Agent': 'Readability - http://readability.com/about/'
+};
+
+// The number of milliseconds to attempt to fetch a resource before timing out.
+var FETCH_TIMEOUT = 10000;
+
+// Content types that we do not extract content from
+var BAD_CONTENT_TYPES = ['audio/mpeg', 'image/gif', 'image/jpeg', 'image/jpg'];
+
+var BAD_CONTENT_TYPES_RE = new RegExp('^(' + BAD_CONTENT_TYPES.join('|') + ')$', 'i');
+
+// Use this setting as the maximum size an article can be
+// for us to attempt parsing. Defaults to 5 MB.
+var MAX_CONTENT_LENGTH = 5242880;
+
+var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) {
+  return typeof obj;
+} : function (obj) {
+  return obj && typeof Symbol === "function" && obj.constructor === Symbol ? "symbol" : typeof obj;
+};
+
+var asyncToGenerator = function (fn) {
+  return function () {
+    var gen = fn.apply(this, arguments);
+    return new Promise(function (resolve, reject) {
+      function step(key, arg) {
+        try {
+          var info = gen[key](arg);
+          var value = info.value;
+        } catch (error) {
+          reject(error);
+          return;
+        }
+
+        if (info.done) {
+          resolve(value);
+        } else {
+          return Promise.resolve(value).then(function (value) {
+            return step("next", value);
+          }, function (err) {
+            return step("throw", err);
+          });
+        }
+      }
+
+      return step("next");
+    });
+  };
+};
+
+var defineProperty = function (obj, key, value) {
+  if (key in obj) {
+    Object.defineProperty(obj, key, {
+      value: value,
+      enumerable: true,
+      configurable: true,
+      writable: true
+    });
+  } else {
+    obj[key] = value;
+  }
+
+  return obj;
+};
+
+var _extends = Object.assign || function (target) {
+  for (var i = 1; i < arguments.length; i++) {
+    var source = arguments[i];
+
+    for (var key in source) {
+      if (Object.prototype.hasOwnProperty.call(source, key)) {
+        target[key] = source[key];
+      }
+    }
+  }
+
+  return target;
+};
+
+var slicedToArray = function () {
+  function sliceIterator(arr, i) {
+    var _arr = [];
+    var _n = true;
+    var _d = false;
+    var _e = undefined;
+
+    try {
+      for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) {
+        _arr.push(_s.value);
+
+        if (i && _arr.length === i) break;
+      }
+    } catch (err) {
+      _d = true;
+      _e = err;
+    } finally {
+      try {
+        if (!_n && _i["return"]) _i["return"]();
+      } finally {
+        if (_d) throw _e;
+      }
+    }
+
+    return _arr;
+  }
+
+  return function (arr, i) {
+    if (Array.isArray(arr)) {
+      return arr;
+    } else if (Symbol.iterator in Object(arr)) {
+      return sliceIterator(arr, i);
+    } else {
+      throw new TypeError("Invalid attempt to destructure non-iterable instance");
+    }
+  };
+}();
+
+function get(options) {
+  return new Promise(function (resolve, reject) {
+    request(options, function (err, response, body) {
+      if (err) {
+        reject(err);
+      } else {
+        resolve({ body: body, response: response });
+      }
+    });
+  });
+}
+
+// Evaluate a response to ensure it's something we should be keeping.
+// This does not validate in the sense of a response being 200 level or
+// not. Validation here means that we haven't found reason to bail from
+// further processing of this url.
+
+function validateResponse(response) {
+  var parseNon2xx = arguments.length <= 1 || arguments[1] === undefined ? false : arguments[1];
+
+  // Check if we got a valid status code
+  if (response.statusMessage !== 'OK') {
+    if (!response.statusCode) {
+      throw new Error('Unable to fetch content. Original exception was ' + response.error);
+    } else if (!parseNon2xx) {
+      throw new Error('Resource returned a response status code of ' + response.statusCode + ' and resource was instructed to reject non-2xx level status codes.');
+    }
+  }
+
+  var _response$headers = response.headers;
+  var contentType = _response$headers['content-type'];
+  var contentLength = _response$headers['content-length'];
+
+  // Check that the content is not in BAD_CONTENT_TYPES
+
+  if (BAD_CONTENT_TYPES_RE.test(contentType)) {
+    throw new Error('Content-type for this resource was ' + contentType + ' and is not allowed.');
+  }
+
+  // Check that the content length is below maximum
+  if (contentLength > MAX_CONTENT_LENGTH) {
+    throw new Error('Content for this resource was too large. Maximum content length is ' + MAX_CONTENT_LENGTH + '.');
+  }
+
+  return true;
+}
+
+// Set our response attribute to the result of fetching our URL.
+// TODO: This should gracefully handle timeouts and raise the
+//       proper exceptions on the many failure cases of HTTP.
+// TODO: Ensure we are not fetching something enormous. Always return
+//       unicode content for HTML, with charset conversion.
+
+var fetchResource = (function () {
+  var _ref2 = asyncToGenerator(regeneratorRuntime.mark(function _callee(url, parsedUrl) {
+    var options, _ref3, response, body;
+
+    return regeneratorRuntime.wrap(function _callee$(_context) {
+      while (1) {
+        switch (_context.prev = _context.next) {
+          case 0:
+            parsedUrl = parsedUrl || URL.parse(encodeURI(url));
+
+            options = {
+              url: parsedUrl,
+              headers: _extends({}, REQUEST_HEADERS),
+              timeout: FETCH_TIMEOUT,
+              // Don't set encoding; fixes issues
+              // w/gzipped responses
+              encoding: null,
+              // Accept cookies
+              jar: true,
+              // Accept and decode gzip
+              gzip: true,
+              // Follow any redirect
+              followAllRedirects: true
+            };
+            _context.next = 4;
+            return get(options);
+
+          case 4:
+            _ref3 = _context.sent;
+            response = _ref3.response;
+            body = _ref3.body;
+            _context.prev = 7;
+
+            validateResponse(response);
+            return _context.abrupt('return', { body: body, response: response });
+
+          case 12:
+            _context.prev = 12;
+            _context.t0 = _context['catch'](7);
+            return _context.abrupt('return', Errors.badUrl);
+
+          case 15:
+          case 'end':
+            return _context.stop();
+        }
+      }
+    }, _callee, this, [[7, 12]]);
+  }));
+
+  function fetchResource(_x2, _x3) {
+    return _ref2.apply(this, arguments);
+  }
+
+  return fetchResource;
+})();
+
+function convertMetaProp($, from, to) {
+  $('meta[' + from + ']').each(function (_, node) {
+    var $node = $(node);
+
+    var value = $node.attr(from);
+    $node.attr(to, value);
+    $node.removeAttr(from);
+  });
+
+  return $;
+}
+
+// For ease of use in extracting from meta tags,
+// replace the "content" attribute on meta tags with the
+// "value" attribute.
+//
+// In addition, normalize 'property' attributes to 'name' for ease of
+// querying later. See, e.g., og or twitter meta tags.
+
+function normalizeMetaTags($) {
+  $ = convertMetaProp($, 'content', 'value');
+  $ = convertMetaProp($, 'property', 'name');
+  return $;
+}
+
+var IS_LINK = new RegExp('https?://', 'i');
+var IS_IMAGE = new RegExp('.(png|gif|jpe?g)', 'i');
+
+var TAGS_TO_REMOVE = ['script', 'style', 'form'].join(',');
+
+// Convert all instances of images with potentially
+// lazy loaded images into normal images.
+// Many sites will have img tags with no source, or an image tag with a src
+// attribute that a is a placeholer. We need to be able to properly fill in
+// the src attribute so the images are no longer lazy loaded.
+function convertLazyLoadedImages($) {
+  $('img').each(function (_, img) {
+    Reflect.ownKeys(img.attribs).forEach(function (attr) {
+      var value = img.attribs[attr];
+
+      if (attr !== 'src' && IS_LINK.test(value) && IS_IMAGE.test(value)) {
+        $(img).attr('src', value);
+      }
+    });
+  });
+
+  return $;
+}
+
+function isComment(index, node) {
+  return node.type === 'comment';
+}
+
+function cleanComments($) {
+  $.root().find('*').contents().filter(isComment).remove();
+
+  return $;
+}
+
+function clean($) {
+  $(TAGS_TO_REMOVE).remove();
+
+  $ = cleanComments($);
+  return $;
+}
+
+var Resource = {
+
+  // Create a Resource.
+  //
+  // :param url: The URL for the document we should retrieve.
+  // :param response: If set, use as the response rather than
+  //                  attempting to fetch it ourselves. Expects a
+  //                  string.
+  create: function create(url, preparedResponse, parsedUrl) {
+    var _this = this;
+
+    return asyncToGenerator(regeneratorRuntime.mark(function _callee() {
+      var result, validResponse;
+      return regeneratorRuntime.wrap(function _callee$(_context) {
+        while (1) {
+          switch (_context.prev = _context.next) {
+            case 0:
+              result = void 0;
+
+              if (!preparedResponse) {
+                _context.next = 6;
+                break;
+              }
+
+              validResponse = {
+                statusMessage: 'OK',
+                statusCode: 200,
+                headers: {
+                  'content-type': 'text/html',
+                  'content-length': 500
+                }
+              };
+
+
+              result = { body: preparedResponse, response: validResponse };
+              _context.next = 9;
+              break;
+
+            case 6:
+              _context.next = 8;
+              return fetchResource(url, parsedUrl);
+
+            case 8:
+              result = _context.sent;
+
+            case 9:
+              if (!result.error) {
+                _context.next = 11;
+                break;
+              }
+
+              return _context.abrupt('return', result);
+
+            case 11:
+              return _context.abrupt('return', _this.generateDoc(result));
+
+            case 12:
+            case 'end':
+              return _context.stop();
+          }
+        }
+      }, _callee, _this);
+    }))();
+  },
+  generateDoc: function generateDoc(_ref) {
+    var content = _ref.body;
+    var response = _ref.response;
+    var contentType = response.headers['content-type'];
+
+    // TODO: Implement is_text function from
+    // https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57
+
+    if (!contentType.includes('html') && !contentType.includes('text')) {
+      throw new Error('Content does not appear to be text.');
+    }
+
+    var $ = cheerio.load(content, { normalizeWhitespace: true });
+
+    if ($.root().children().length === 0) {
+      throw new Error('No children, likely a bad parse.');
+    }
+
+    $ = normalizeMetaTags($);
+    $ = convertLazyLoadedImages($);
+    $ = clean($);
+
+    return $;
+  }
+};
+
+var NYMagExtractor = {
+  domain: 'nymag.com',
+  content: {
+    // Order by most likely. Extractor will stop on first occurence
+    selectors: ['div.article-content', 'section.body', 'article.article'],
+
+    // Selectors to remove from the extracted content
+    clean: ['.ad', '.single-related-story'],
+
+    // Object of tranformations to make on matched elements
+    // Each key is the selector, each value is the tag to
+    // transform to.
+    // If a function is given, it should return a string
+    // to convert to or nothing (in which case it will not perform
+    // the transformation.
+    transforms: {
+      // Convert h1s to h2s
+      h1: 'h2',
+
+      // Convert lazy-loaded noscript images to figures
+      noscript: function noscript($node) {
+        var $children = $node.children();
+        if ($children.length === 1 && $children.get(0).tagName === 'img') {
+          return 'figure';
+        }
+
+        return null;
+      }
+    }
+  },
+
+  title: {
+    selectors: ['h1.lede-feature-title', 'h1.headline-primary', 'h1']
+  },
+
+  author: {
+    selectors: ['.by-authors', '.lede-feature-author']
+  },
+
+  dek: {
+    selectors: ['.lede-feature-teaser']
+  },
+
+  date_published: {
+    selectors: ['time.article-timestamp[datetime]', 'time.article-timestamp']
+  }
+};
+
+var BloggerExtractor = {
+  domain: 'blogspot.com',
+  content: {
+    // Blogger is insane and does not load its content
+    // initially in the page, but it's all there
+    // in noscript
+    selectors: ['.post-content noscript'],
+
+    // Selectors to remove from the extracted content
+    clean: [],
+
+    // Convert the noscript tag to a div
+    transforms: {
+      noscript: 'div'
+    }
+  },
+
+  author: {
+    selectors: ['.post-author-name']
+  },
+
+  title: {
+    selectors: ['h2.title']
+  },
+
+  date_published: {
+    selectors: ['span.publishdate']
+  }
+};
+
+var WikipediaExtractor = {
+  domain: 'wikipedia.org',
+  content: {
+    selectors: ['#mw-content-text'],
+
+    defaultCleaner: false,
+
+    // transform top infobox to an image with caption
+    transforms: {
+      '.infobox img': function infoboxImg($node) {
+        var $parent = $node.parents('.infobox');
+        // Only prepend the first image in .infobox
+        if ($parent.children('img').length === 0) {
+          $parent.prepend($node);
+        }
+      },
+      '.infobox caption': 'figcaption',
+      '.infobox': 'figure'
+    },
+
+    // Selectors to remove from the extracted content
+    clean: ['.mw-editsection', 'figure tr, figure td, figure tbody', '#toc']
+
+  },
+
+  author: 'Wikipedia Contributors',
+
+  title: {
+    selectors: ['h2.title']
+  },
+
+  date_published: {
+    selectors: ['#footer-info-lastmod']
+  }
+
+};
+
+var TwitterExtractor = {
+  domain: 'twitter.com',
+
+  content: {
+    transforms: {
+      // We're transforming essentially the whole page here.
+      // Twitter doesn't have nice selectors, so our initial
+      // selector grabs the whole page, then we're re-writing
+      // it to fit our needs before we clean it up.
+      '.permalink[role=main]': function permalinkRoleMain($node, $) {
+        var tweets = $node.find('.tweet');
+        var $tweetContainer = $('<div id="TWEETS_GO_HERE"></div>');
+        $tweetContainer.append(tweets);
+        $node.replaceWith($tweetContainer);
+      },
+
+      // Twitter wraps @ with s, which
+      // renders as a strikethrough
+      s: 'span'
+    },
+
+    selectors: ['.permalink[role=main]'],
+
+    defaultCleaner: false,
+
+    clean: ['.stream-item-footer', 'button', '.tweet-details-fixer']
+  },
+
+  author: {
+    selectors: ['.tweet.permalink-tweet .username']
+  },
+
+  date_published: {
+    selectors: ['.permalink-tweet ._timestamp[data-time-ms]']
+  }
+
+};
+
+var Extractors = {
+  'nymag.com': NYMagExtractor,
+  'blogspot.com': BloggerExtractor,
+  'wikipedia.org': WikipediaExtractor,
+  'twitter.com': TwitterExtractor
+};
+
+// Spacer images to be removed
+var SPACER_RE = new RegExp('trans|transparent|spacer|blank', 'i');
+
+// A list of tags to strip from the output if we encounter them.
+var STRIP_OUTPUT_TAGS = ['title', 'script', 'noscript', 'link', 'style', 'hr', 'embed', 'iframe', 'object'];
+
+// cleanAttributes
+var REMOVE_ATTRS = ['style', 'align'];
+var REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(function (selector) {
+  return '[' + selector + ']';
+});
+var REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');
+var WHITELIST_ATTRS = ['src', 'href', 'class', 'id', 'score'];
+var WHITELIST_ATTRS_RE = new RegExp('^(' + WHITELIST_ATTRS.join('|') + ')$', 'i');
+
+// removeEmpty
+var REMOVE_EMPTY_TAGS = ['p'];
+var REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(function (tag) {
+  return tag + ':empty';
+}).join(',');
+
+// cleanTags
+var CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');
+
+// cleanHeaders
+var HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];
+var HEADER_TAG_LIST = HEADER_TAGS.join(',');
+
+// // CONTENT FETCHING CONSTANTS ////
+
+// A list of strings that can be considered unlikely candidates when
+// extracting content from a resource. These strings are joined together
+// and then tested for existence using re:test, so may contain simple,
+// non-pipe style regular expression queries if necessary.
+var UNLIKELY_CANDIDATES_BLACKLIST = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot',
+// 'form', // This is too generic, has too many false positives
+'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.
+'menu', 'meta', 'nav', 'outbrain', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box
+'presence_control_external', // lifehacker.com container full of false positives
+'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'taboola', 'tools'];
+
+// A list of strings that can be considered LIKELY candidates when
+// extracting content from a resource. Essentially, the inverse of the
+// blacklist above - if something matches both blacklist and whitelist,
+// it is kept. This is useful, for example, if something has a className
+// of "rss-content entry-content". It matched 'rss', so it would normally
+// be removed, however, it's also the entry content, so it should be left
+// alone.
+//
+// These strings are joined together and then tested for existence using
+// re:test, so may contain simple, non-pipe style regular expression queries
+// if necessary.
+var UNLIKELY_CANDIDATES_WHITELIST = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form
+'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];
+
+// A list of tags which, if found inside, should cause a <div /> to NOT
+// be turned into a paragraph tag. Shallow div tags without these elements
+// should be turned into <p /> tags.
+var DIV_TO_P_BLOCK_TAGS = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');
+
+// A list of tags that should be ignored when trying to find the top candidate
+// for a document.
+var NON_TOP_CANDIDATE_TAGS = ['br', 'b', 'i', 'label', 'hr', 'area', 'base', 'basefont', 'input', 'img', 'link', 'meta'];
+
+var NON_TOP_CANDIDATE_TAGS_RE = new RegExp('^(' + NON_TOP_CANDIDATE_TAGS.join('|') + ')$', 'i');
+
+var PHOTO_HINTS = ['figure', 'photo', 'image', 'caption'];
+var PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');
+
+// A list of strings that denote a positive scoring for this content as being
+// an article container. Checked against className and id.
+//
+// TODO: Perhaps have these scale based on their odds of being quality?
+var POSITIVE_SCORE_HINTS = ['article', 'articlecontent', 'instapaper_body', 'blog', 'body', 'content', 'entry-content-asset', 'entry', 'hentry', 'main', 'Normal', 'page', 'pagination', 'permalink', 'post', 'story', 'text', '[-_]copy', // usatoday
+'\\Bcopy'];
+
+// The above list, joined into a matching regular expression
+var POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');
+
+// A list of strings that denote a negative scoring for this content as being
+// an article container. Checked against className and id.
+//
+// TODO: Perhaps have these scale based on their odds of being quality?
+var NEGATIVE_SCORE_HINTS = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'bottom', 'byline', 'clear', 'com-', 'combx', 'comment', 'comment\\B', 'contact', 'copy', 'credit', 'crumb', 'date', 'deck', 'excerpt', 'featured', // tnr.com has a featured_content which throws us off
+'foot', 'footer', 'footnote', 'graf', 'head', 'info', 'infotext', // newscientist.com copyright
+'instapaper_ignore', 'jump', 'linebreak', 'link', 'masthead', 'media', 'meta', 'modal', 'outbrain', // slate.com junk
+'promo', 'pr_', // autoblog - press release
+'related', 'respond', 'roundcontent', // lifehacker restricted content warning
+'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget'];
+// The above list, joined into a matching regular expression
+var NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');
+
+// XPath to try to determine if a page is wordpress. Not always successful.
+var IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';
+
+// A list of words that, if found in link text or URLs, likely mean that
+// this link is not a next page link.
+var EXTRANEOUS_LINK_HINTS = ['print', 'archive', 'comment', 'discuss', 'e-mail', 'email', 'share', 'reply', 'all', 'login', 'sign', 'single', 'adx', 'entry-unrelated'];
+var EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');
+
+// Match any phrase that looks like it could be page, or paging, or pagination
+var PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');
+
+// A list of all of the block level tags known in HTML5 and below. Taken from
+// http://bit.ly/qneNIT
+var BLOCK_LEVEL_TAGS = ['article', 'aside', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'col', 'colgroup', 'dd', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'map', 'object', 'ol', 'output', 'p', 'pre', 'progress', 'section', 'table', 'tbody', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'ul', 'video'];
+var BLOCK_LEVEL_TAGS_RE = new RegExp('^(' + BLOCK_LEVEL_TAGS.join('|') + ')$', 'i');
+
+// The removal is implemented as a blacklist and whitelist, this test finds
+// blacklisted elements that aren't whitelisted. We do this all in one
+// expression-both because it's only one pass, and because this skips the
+// serialization for whitelisted nodes.
+var candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');
+var CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');
+
+var candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');
+var CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');
+
+function stripUnlikelyCandidates($) {
+  //  Loop through the provided document and remove any non-link nodes
+  //  that are unlikely candidates for article content.
+  //
+  //  Links are ignored because there are very often links to content
+  //  that are identified as non-body-content, but may be inside
+  //  article-like content.
+  //
+  //  :param $: a cheerio object to strip nodes from
+  //  :return $: the cleaned cheerio object
+  $('*').not('a').each(function (index, node) {
+    var $node = $(node);
+    var classes = $node.attr('class');
+    var id = $node.attr('id');
+    if (!id && !classes) return;
+
+    var classAndId = (classes || '') + ' ' + (id || '');
+    if (CANDIDATES_WHITELIST.test(classAndId)) {
+      return;
+    } else if (CANDIDATES_BLACKLIST.test(classAndId)) {
+      $node.remove();
+    }
+  });
+
+  return $;
+}
+
+// ## NOTES:
+// Another good candidate for refactoring/optimizing.
+// Very imperative code, I don't love it. - AP
+
+
+//  Given cheerio object, convert consecutive <br /> tags into
+//  <p /> tags instead.
+//
+//  :param $: A cheerio object
+
+function brsToPs($) {
+  var collapsing = false;
+  $('br').each(function (index, element) {
+    var nextElement = $(element).next().get(0);
+
+    if (nextElement && nextElement.tagName === 'br') {
+      collapsing = true;
+      $(element).remove();
+    } else if (collapsing) {
+      collapsing = false;
+      // $(element).replaceWith('<p />')
+      paragraphize(element, $, true);
+    }
+  });
+
+  return $;
+}
+
+// Given a node, turn it into a P if it is not already a P, and
+// make sure it conforms to the constraints of a P tag (I.E. does
+// not contain any other block tags.)
+//
+// If the node is a <br />, it treats the following inline siblings
+// as if they were its children.
+//
+// :param node: The node to paragraphize; this is a raw node
+// :param $: The cheerio object to handle dom manipulation
+// :param br: Whether or not the passed node is a br
+
+function paragraphize(node, $) {
+  var br = arguments.length <= 2 || arguments[2] === undefined ? false : arguments[2];
+
+  var $node = $(node);
+
+  if (br) {
+    var sibling = node.nextSibling;
+    var p = $('<p></p>');
+
+    // while the next node is text or not a block level element
+    // append it to a new p node
+    while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))) {
+      var nextSibling = sibling.nextSibling;
+      $(sibling).appendTo(p);
+      sibling = nextSibling;
+    }
+
+    $node.replaceWith(p);
+    $node.remove();
+    return $;
+  }
+
+  return $;
+}
+
+function convertDivs($) {
+  $('div').each(function (index, div) {
+    var $div = $(div);
+    var convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;
+
+    if (convertable) {
+      convertNodeTo($div, $, 'p');
+    }
+  });
+
+  return $;
+}
+
+function convertSpans($) {
+  $('span').each(function (index, span) {
+    var $span = $(span);
+    var convertable = $span.parents('p, div').length === 0;
+    if (convertable) {
+      convertNodeTo($span, $, 'p');
+    }
+  });
+
+  return $;
+}
+
+// Loop through the provided doc, and convert any p-like elements to
+// actual paragraph tags.
+//
+//   Things fitting this criteria:
+//   * Multiple consecutive <br /> tags.
+//   * <div /> tags without block level elements inside of them
+//   * <span /> tags who are not children of <p /> or <div /> tags.
+//
+//   :param $: A cheerio object to search
+//   :return cheerio object with new p elements
+//   (By-reference mutation, though. Returned just for convenience.)
+
+function convertToParagraphs($) {
+  $ = brsToPs($);
+  $ = convertDivs($);
+  $ = convertSpans($);
+
+  return $;
+}
+
+function convertNodeTo($node, $) {
+  var tag = arguments.length <= 2 || arguments[2] === undefined ? 'p' : arguments[2];
+
+  var node = $node.get(0);
+  if (!node) {
+    return $;
+  }
+
+  var _$node$get = $node.get(0);
+
+  var attribs = _$node$get.attribs;
+
+  var attribString = Reflect.ownKeys(attribs).map(function (key) {
+    return key + '=' + attribs[key];
+  }).join(' ');
+
+  $node.replaceWith('<' + tag + ' ' + attribString + '>' + $node.contents() + '</' + tag + '>');
+  return $;
+}
+
+function cleanForHeight($img, $) {
+  var height = parseInt($img.attr('height'), 10);
+  var width = parseInt($img.attr('width'), 10) || 20;
+
+  // Remove images that explicitly have very small heights or
+  // widths, because they are most likely shims or icons,
+  // which aren't very useful for reading.
+  if ((height || 20) < 10 || width < 10) {
+    $img.remove();
+  } else if (height) {
+    // Don't ever specify a height on images, so that we can
+    // scale with respect to width without screwing up the
+    // aspect ratio.
+    $img.removeAttr('height');
+  }
+
+  return $;
+}
+
+// Cleans out images where the source string matches transparent/spacer/etc
+// TODO This seems very aggressive - AP
+function removeSpacers($img, $) {
+  if (SPACER_RE.test($img.attr('src'))) {
+    $img.remove();
+  }
+
+  return $;
+}
+
+function cleanImages($article, $) {
+  $article.find('img').each(function (index, img) {
+    var $img = $(img);
+
+    cleanForHeight($img, $);
+    removeSpacers($img, $);
+  });
+
+  return $;
+}
+
+function stripJunkTags(article, $) {
+  $(STRIP_OUTPUT_TAGS.join(','), article).remove();
+
+  return $;
+}
+
+// H1 tags are typically the article title, which should be extracted
+// by the title extractor instead. If there's less than 3 of them (<3),
+// strip them. Otherwise, turn 'em into H2s.
+
+function cleanHOnes(article, $) {
+  var $hOnes = $('h1', article);
+
+  if ($hOnes.length < 3) {
+    $hOnes.each(function (index, node) {
+      return $(node).remove();
+    });
+  } else {
+    $hOnes.each(function (index, node) {
+      convertNodeTo($(node), $, 'h2');
+    });
+  }
+
+  return $;
+}
+
+function removeAllButWhitelist($article) {
+  // $('*', article).each((index, node) => {
+  $article.find('*').each(function (index, node) {
+    node.attribs = Reflect.ownKeys(node.attribs).reduce(function (acc, attr) {
+      if (WHITELIST_ATTRS_RE.test(attr)) {
+        return _extends({}, acc, defineProperty({}, attr, node.attribs[attr]));
+      }
+
+      return acc;
+    }, {});
+  });
+}
+
+// function removeAttrs(article, $) {
+//   REMOVE_ATTRS.forEach((attr) => {
+//     $(`[${attr}]`, article).removeAttr(attr);
+//   });
+// }
+
+// Remove attributes like style or align
+function cleanAttributes($article) {
+  removeAllButWhitelist($article);
+
+  return $article;
+}
+
+function removeEmpty($article, $) {
+  $article.find('p').each(function (index, p) {
+    var $p = $(p);
+    if ($p.text().trim() === '') $p.remove();
+  });
+
+  return $;
+}
+
+// // CONTENT FETCHING CONSTANTS ////
+
+// A list of strings that can be considered unlikely candidates when
+// extracting content from a resource. These strings are joined together
+// and then tested for existence using re:test, so may contain simple,
+// non-pipe style regular expression queries if necessary.
+var UNLIKELY_CANDIDATES_BLACKLIST$1 = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot', 'form', 'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.
+'menu', 'meta', 'nav', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box
+'presence_control_external', // lifehacker.com container full of false positives
+'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'tools'];
+
+// A list of strings that can be considered LIKELY candidates when
+// extracting content from a resource. Essentially, the inverse of the
+// blacklist above - if something matches both blacklist and whitelist,
+// it is kept. This is useful, for example, if something has a className
+// of "rss-content entry-content". It matched 'rss', so it would normally
+// be removed, however, it's also the entry content, so it should be left
+// alone.
+//
+// These strings are joined together and then tested for existence using
+// re:test, so may contain simple, non-pipe style regular expression queries
+// if necessary.
+var UNLIKELY_CANDIDATES_WHITELIST$1 = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form
+'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];
+
+// A list of tags which, if found inside, should cause a <div /> to NOT
+// be turned into a paragraph tag. Shallow div tags without these elements
+// should be turned into <p /> tags.
+var DIV_TO_P_BLOCK_TAGS$1 = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');
+
+// A list of tags that should be ignored when trying to find the top candidate
+// for a document.
+var NON_TOP_CANDIDATE_TAGS$1 = ['br', 'b', 'i', 'label', 'hr', 'area', 'base', 'basefont', 'input', 'img', 'link', 'meta'];
+
+var NON_TOP_CANDIDATE_TAGS_RE$1 = new RegExp('^(' + NON_TOP_CANDIDATE_TAGS$1.join('|') + ')$', 'i');
+
+// A list of selectors that specify, very clearly, either hNews or other
+// very content-specific style content, like Blogger templates.
+// More examples here: http://microformats.org/wiki/blog-post-formats
+var HNEWS_CONTENT_SELECTORS$1 = [['.hentry', '.entry-content'], ['entry', '.entry-content'], ['.entry', '.entry_content'], ['.post', '.postbody'], ['.post', '.post_body'], ['.post', '.post-body']];
+
+var PHOTO_HINTS$1 = ['figure', 'photo', 'image', 'caption'];
+var PHOTO_HINTS_RE$1 = new RegExp(PHOTO_HINTS$1.join('|'), 'i');
+
+// A list of strings that denote a positive scoring for this content as being
+// an article container. Checked against className and id.
+//
+// TODO: Perhaps have these scale based on their odds of being quality?
+var POSITIVE_SCORE_HINTS$1 = ['article', 'articlecontent', 'instapaper_body', 'blog', 'body', 'content', 'entry-content-asset', 'entry', 'hentry', 'main', 'Normal', 'page', 'pagination', 'permalink', 'post', 'story', 'text', '[-_]copy', // usatoday
+'\\Bcopy'];
+
+// The above list, joined into a matching regular expression
+var POSITIVE_SCORE_RE$1 = new RegExp(POSITIVE_SCORE_HINTS$1.join('|'), 'i');
+
+// Readability publisher-specific guidelines
+var READABILITY_ASSET$1 = new RegExp('entry-content-asset', 'i');
+
+// A list of strings that denote a negative scoring for this content as being
+// an article container. Checked against className and id.
+//
+// TODO: Perhaps have these scale based on their odds of being quality?
+var NEGATIVE_SCORE_HINTS$1 = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'bottom', 'byline', 'clear', 'com-', 'combx', 'comment', 'comment\\B', 'contact', 'copy', 'credit', 'crumb', 'date', 'deck', 'excerpt', 'featured', // tnr.com has a featured_content which throws us off
+'foot', 'footer', 'footnote', 'graf', 'head', 'info', 'infotext', // newscientist.com copyright
+'instapaper_ignore', 'jump', 'linebreak', 'link', 'masthead', 'media', 'meta', 'modal', 'outbrain', // slate.com junk
+'promo', 'pr_', // autoblog - press release
+'related', 'respond', 'roundcontent', // lifehacker restricted content warning
+'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget'];
+// The above list, joined into a matching regular expression
+var NEGATIVE_SCORE_RE$1 = new RegExp(NEGATIVE_SCORE_HINTS$1.join('|'), 'i');
+
+// A list of all of the block level tags known in HTML5 and below. Taken from
+// http://bit.ly/qneNIT
+var BLOCK_LEVEL_TAGS$1 = ['article', 'aside', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'col', 'colgroup', 'dd', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'map', 'object', 'ol', 'output', 'p', 'pre', 'progress', 'section', 'table', 'tbody', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'ul', 'video'];
+var BLOCK_LEVEL_TAGS_RE$1 = new RegExp('^(' + BLOCK_LEVEL_TAGS$1.join('|') + ')$', 'i');
+
+// The removal is implemented as a blacklist and whitelist, this test finds
+// blacklisted elements that aren't whitelisted. We do this all in one
+// expression-both because it's only one pass, and because this skips the
+// serialization for whitelisted nodes.
+var candidatesBlacklist$1 = UNLIKELY_CANDIDATES_BLACKLIST$1.join('|');
+var candidatesWhitelist$1 = UNLIKELY_CANDIDATES_WHITELIST$1.join('|');
+var PARAGRAPH_SCORE_TAGS$1 = new RegExp('^(p|li|span|pre)$', 'i');
+var CHILD_CONTENT_TAGS$1 = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');
+var BAD_TAGS$1 = new RegExp('^(address|form)$', 'i');
+
+// Get the score of a node based on its className and id.
+function getWeight(node) {
+  var classes = node.attr('class');
+  var id = node.attr('id');
+  var score = 0;
+
+  if (id) {
+    // if id exists, try to score on both positive and negative
+    if (POSITIVE_SCORE_RE$1.test(id)) {
+      score += 25;
+    }
+    if (NEGATIVE_SCORE_RE$1.test(id)) {
+      score -= 25;
+    }
+  }
+
+  if (classes) {
+    if (score === 0) {
+      // if classes exist and id did not contribute to score
+      // try to score on both positive and negative
+      if (POSITIVE_SCORE_RE$1.test(classes)) {
+        score += 25;
+      }
+      if (NEGATIVE_SCORE_RE$1.test(classes)) {
+        score -= 25;
+      }
+    }
+
+    // even if score has been set by id, add score for
+    // possible photo matches
+    // "try to keep photos if we can"
+    if (PHOTO_HINTS_RE$1.test(classes)) {
+      score += 10;
+    }
+
+    // add 25 if class matches entry-content-asset,
+    // a class apparently instructed for use in the
+    // Readability publisher guidelines
+    // https://www.readability.com/developers/guidelines
+    if (READABILITY_ASSET$1.test(classes)) {
+      score += 25;
+    }
+  }
+
+  return score;
+}
+
+// returns the score of a node based on
+// the node's score attribute
+// returns null if no score set
+function getScore($node) {
+  return parseFloat($node.attr('score')) || null;
+}
+
+// return 1 for every comma in text
+function scoreCommas(text) {
+  return (text.match(/,/g) || []).length;
+}
+
+var idkRe = new RegExp('^(p|pre)$', 'i');
+
+function scoreLength(textLength) {
+  var tagName = arguments.length <= 1 || arguments[1] === undefined ? 'p' : arguments[1];
+
+  var chunks = textLength / 50;
+
+  if (chunks > 0) {
+    var lengthBonus = void 0;
+
+    // No idea why p or pre are being tamped down here
+    // but just following the source for now
+    // Not even sure why tagName is included here,
+    // since this is only being called from the context
+    // of scoreParagraph
+    if (idkRe.test(tagName)) {
+      lengthBonus = chunks - 2;
+    } else {
+      lengthBonus = chunks - 1.25;
+    }
+
+    return Math.min(Math.max(lengthBonus, 0), 3);
+  }
+
+  return 0;
+}
+
+// Score a paragraph using various methods. Things like number of
+// commas, etc. Higher is better.
+function scoreParagraph(node) {
+  var score = 1;
+  var text = node.text().trim();
+  var textLength = text.length;
+
+  // If this paragraph is less than 25 characters, don't count it.
+  if (textLength < 25) {
+    return 0;
+  }
+
+  // Add points for any commas within this paragraph
+  score += scoreCommas(text);
+
+  // For every 50 characters in this paragraph, add another point. Up
+  // to 3 points.
+  score += scoreLength(textLength);
+
+  // Articles can end with short paragraphs when people are being clever
+  // but they can also end with short paragraphs setting up lists of junk
+  // that we strip. This negative tweaks junk setup paragraphs just below
+  // the cutoff threshold.
+  if (text.slice(-1) === ':') {
+    score -= 1;
+  }
+
+  return score;
+}
+
+function setScore($node, $, score) {
+  $node.attr('score', score);
+  return $node;
+}
+
+function addScore($node, $, amount) {
+  try {
+    var score = getOrInitScore($node, $) + amount;
+    setScore($node, $, score);
+  } catch (e) {
+    // Ignoring; error occurs in scoreNode
+  }
+
+  return $node;
+}
+
+// Adds 1/4 of a child's score to its parent
+function addToParent(node, $, score) {
+  var parent = node.parent();
+  if (parent) {
+    addScore(parent, $, score * 0.25);
+  }
+
+  return node;
+}
+
+// gets and returns the score if it exists
+// if not, initializes a score based on
+// the node's tag type
+function getOrInitScore($node, $) {
+  var weightNodes = arguments.length <= 2 || arguments[2] === undefined ? true : arguments[2];
+
+  var score = getScore($node);
+
+  if (score) {
+    return score;
+  }
+
+  score = scoreNode($node);
+
+  if (weightNodes) {
+    score += getWeight($node);
+  }
+
+  addToParent($node, $, score);
+
+  return score;
+}
+
+// Score an individual node. Has some smarts for paragraphs, otherwise
+// just scores based on tag.
+function scoreNode($node) {
+  var _$node$get = $node.get(0);
+
+  var tagName = _$node$get.tagName;
+
+  // TODO: Consider ordering by most likely.
+  // E.g., if divs are a more common tag on a page,
+  // Could save doing that regex test on every node – AP
+
+  if (PARAGRAPH_SCORE_TAGS$1.test(tagName)) {
+    return scoreParagraph($node);
+  } else if (tagName === 'div') {
+    return 5;
+  } else if (CHILD_CONTENT_TAGS$1.test(tagName)) {
+    return 3;
+  } else if (BAD_TAGS$1.test(tagName)) {
+    return -3;
+  } else if (tagName === 'th') {
+    return -5;
+  }
+
+  return 0;
+}
+
+function convertSpans$1($node, $) {
+  if ($node.get(0)) {
+    var _$node$get = $node.get(0);
+
+    var tagName = _$node$get.tagName;
+
+
+    if (tagName === 'span') {
+      // convert spans to divs
+      convertNodeTo($node, $, 'div');
+    }
+  }
+}
+
+function addScoreTo($node, $, score) {
+  if ($node) {
+    convertSpans$1($node, $);
+    addScore($node, $, score);
+  }
+}
+
+function scorePs($, weightNodes) {
+  $('p, pre').not('[score]').each(function (index, node) {
+    // The raw score for this paragraph, before we add any parent/child
+    // scores.
+    var $node = $(node);
+    $node = setScore($node, $, getOrInitScore($node, $, weightNodes));
+
+    var $parent = $node.parent();
+    var rawScore = scoreNode($node);
+
+    addScoreTo($parent, $, rawScore, weightNodes);
+    if ($parent) {
+      // Add half of the individual content score to the
+      // grandparent
+      addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);
+    }
+  });
+
+  return $;
+}
+
+// score content. Parents get the full value of their children's
+// content score, grandparents half
+function scoreContent($) {
+  var weightNodes = arguments.length <= 1 || arguments[1] === undefined ? true : arguments[1];
+
+  // First, look for special hNews based selectors and give them a big
+  // boost, if they exist
+  HNEWS_CONTENT_SELECTORS$1.forEach(function (_ref) {
+    var _ref2 = slicedToArray(_ref, 2);
+
+    var parentSelector = _ref2[0];
+    var childSelector = _ref2[1];
+
+    $(parentSelector + ' ' + childSelector).each(function (index, node) {
+      addScore($(node).parent(parentSelector), $, 80);
+    });
+  });
+
+  // Doubling this again
+  // Previous solution caused a bug
+  // in which parents weren't retaining
+  // scores. This is not ideal, and
+  // should be fixed.
+  scorePs($, weightNodes);
+  scorePs($, weightNodes);
+
+  return $;
+}
+
+var NORMALIZE_RE = /\s{2,}/g;
+
+function normalizeSpaces(text) {
+  return text.replace(NORMALIZE_RE, ' ').trim();
+}
+
+// Given a node type to search for, and a list of regular expressions,
+// look to see if this extraction can be found in the URL. Expects
+// that each expression in r_list will return group(1) as the proper
+// string to be cleaned.
+// Only used for date_published currently.
+function extractFromUrl(url, regexList) {
+  var matchRe = regexList.find(function (re) {
+    return re.test(url);
+  });
+  if (matchRe) {
+    return matchRe.exec(url)[1];
+  }
+
+  return null;
+}
+
+// An expression that looks to try to find the page digit within a URL, if
+// it exists.
+// Matches:
+//  page=1
+//  pg=1
+//  p=1
+//  paging=12
+//  pag=7
+//  pagination/1
+//  paging/88
+//  pa/83
+//  p/11
+//
+// Does not match:
+//  pg=102
+//  page:2
+var PAGE_IN_HREF_RE = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');
+
+var HAS_ALPHA_RE = /[a-z]/i;
+
+var IS_ALPHA_RE = /^[a-z]+$/i;
+var IS_DIGIT_RE = /^[0-9]+$/i;
+
+function pageNumFromUrl(url) {
+  var matches = url.match(PAGE_IN_HREF_RE);
+  if (!matches) return null;
+
+  var pageNum = parseInt(matches[6], 10);
+
+  // Return pageNum < 100, otherwise
+  // return null
+  return pageNum < 100 ? pageNum : null;
+}
+
+function removeAnchor(url) {
+  return url.split('#')[0].replace(/\/$/, '');
+}
+
+function isGoodSegment(segment, index, firstSegmentHasLetters) {
+  var goodSegment = true;
+
+  // If this is purely a number, and it's the first or second
+  // url_segment, it's probably a page number. Remove it.
+  if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {
+    goodSegment = true;
+  }
+
+  // If this is the first url_segment and it's just "index",
+  // remove it
+  if (index === 0 && segment.toLowerCase() === 'index') {
+    goodSegment = false;
+  }
+
+  // If our first or second url_segment is smaller than 3 characters,
+  // and the first url_segment had no alphas, remove it.
+  if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {
+    goodSegment = false;
+  }
+
+  return goodSegment;
+}
+
+// Take a URL, and return the article base of said URL. That is, no
+// pagination data exists in it. Useful for comparing to other links
+// that might have pagination data within them.
+function articleBaseUrl(url, parsed) {
+  var parsedUrl = parsed || URL.parse(url);
+  var protocol = parsedUrl.protocol;
+  var host = parsedUrl.host;
+  var path = parsedUrl.path;
+
+
+  var firstSegmentHasLetters = false;
+  var cleanedSegments = path.split('/').reverse().reduce(function (acc, rawSegment, index) {
+    var segment = rawSegment;
+
+    // Split off and save anything that looks like a file type.
+    if (segment.includes('.')) {
+      var _segment$split = segment.split('.');
+
+      var _segment$split2 = slicedToArray(_segment$split, 2);
+
+      var possibleSegment = _segment$split2[0];
+      var fileExt = _segment$split2[1];
+
+      if (IS_ALPHA_RE.test(fileExt)) {
+        segment = possibleSegment;
+      }
+    }
+
+    // If our first or second segment has anything looking like a page
+    // number, remove it.
+    if (PAGE_IN_HREF_RE.test(segment) && index < 2) {
+      segment = segment.replace(PAGE_IN_HREF_RE, '');
+    }
+
+    // If we're on the first segment, check to see if we have any
+    // characters in it. The first segment is actually the last bit of
+    // the URL, and this will be helpful to determine if we're on a URL
+    // segment that looks like "/2/" for example.
+    if (index === 0) {
+      firstSegmentHasLetters = HAS_ALPHA_RE.test(segment);
+    }
+
+    // If it's not marked for deletion, push it to cleaned_segments.
+    if (isGoodSegment(segment, index, firstSegmentHasLetters)) {
+      acc.push(segment);
+    }
+
+    return acc;
+  }, []);
+
+  return protocol + '//' + host + cleanedSegments.reverse().join('/');
+}
+
+// Given a string, return True if it appears to have an ending sentence
+// within it, false otherwise.
+var SENTENCE_END_RE = new RegExp('.( |$)');
+function hasSentenceEnd(text) {
+  return SENTENCE_END_RE.test(text);
+}
+
+// Now that we have a top_candidate, look through the siblings of
+// it to see if any of them are decently scored. If they are, they
+// may be split parts of the content (Like two divs, a preamble and
+// a body.) Example:
+// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14
+function mergeSiblings($candidate, topScore, $) {
+  if (!$candidate.parent().length) {
+    return $candidate;
+  }
+
+  var siblingScoreThreshold = Math.max(10, topScore * 0.25);
+  var wrappingDiv = $('<div></div>');
+
+  $candidate.parent().children().each(function (index, sibling) {
+    var $sibling = $(sibling);
+    // Ignore tags like BR, HR, etc
+    if (NON_TOP_CANDIDATE_TAGS_RE$1.test(sibling.tagName)) {
+      return null;
+    }
+
+    var siblingScore = getScore($sibling);
+    if (siblingScore) {
+      if ($sibling === $candidate) {
+        wrappingDiv.append($sibling);
+      } else {
+        var contentBonus = 0;
+        var density = linkDensity($sibling);
+
+        // If sibling has a very low link density,
+        // give it a small bonus
+        if (density < 0.05) {
+          contentBonus += 20;
+        }
+
+        // If sibling has a high link density,
+        // give it a penalty
+        if (density >= 0.5) {
+          contentBonus -= 20;
+        }
+
+        // If sibling node has the same class as
+        // candidate, give it a bonus
+        if ($sibling.attr('class') === $candidate.attr('class')) {
+          contentBonus += topScore * 0.2;
+        }
+
+        var newScore = siblingScore + contentBonus;
+
+        if (newScore >= siblingScoreThreshold) {
+          return wrappingDiv.append($sibling);
+        } else if (sibling.tagName === 'p') {
+          var siblingContent = $sibling.text();
+          var siblingContentLength = textLength(siblingContent);
+
+          if (siblingContentLength > 80 && density < 0.25) {
+            return wrappingDiv.append($sibling);
+          } else if (siblingContentLength <= 80 && density === 0 && hasSentenceEnd(siblingContent)) {
+            return wrappingDiv.append($sibling);
+          }
+        }
+      }
+    }
+
+    return null;
+  });
+
+  return wrappingDiv;
+}
+
+// After we've calculated scores, loop through all of the possible
+// candidate nodes we found and find the one with the highest score.
+function findTopCandidate($) {
+  var $candidate = void 0;
+  var topScore = 0;
+
+  $('[score]').each(function (index, node) {
+    // Ignore tags like BR, HR, etc
+    if (NON_TOP_CANDIDATE_TAGS_RE$1.test(node.tagName)) {
+      return;
+    }
+
+    var $node = $(node);
+    var score = getScore($node);
+
+    if (score > topScore) {
+      topScore = score;
+      $candidate = $node;
+    }
+  });
+
+  // If we don't have a candidate, return the body
+  // or whatever the first element is
+  if (!$candidate) {
+    return $('body') || $('*').first();
+  }
+
+  $candidate = mergeSiblings($candidate, topScore, $);
+
+  return $candidate;
+}
+
+function removeUnlessContent($node, $, weight) {
+  // Explicitly save entry-content-asset tags, which are
+  // noted as valuable in the Publisher guidelines. For now
+  // this works everywhere. We may want to consider making
+  // this less of a sure-thing later.
+  if ($node.hasClass('entry-content-asset')) {
+    return;
+  }
+
+  var content = normalizeSpaces($node.text());
+
+  if (scoreCommas(content) < 10) {
+    var pCount = $('p', $node).length;
+    var inputCount = $('input', $node).length;
+
+    // Looks like a form, too many inputs.
+    if (inputCount > pCount / 3) {
+      $node.remove();
+      return;
+    }
+
+    var contentLength = content.length;
+    var imgCount = $('img', $node).length;
+
+    // Content is too short, and there are no images, so
+    // this is probably junk content.
+    if (contentLength < 25 && imgCount === 0) {
+      $node.remove();
+      return;
+    }
+
+    var density = linkDensity($node);
+
+    // Too high of link density, is probably a menu or
+    // something similar.
+    // console.log(weight, density, contentLength)
+    if (weight < 25 && density > 0.2 && contentLength > 75) {
+      $node.remove();
+      return;
+    }
+
+    // Too high of a link density, despite the score being
+    // high.
+    if (weight >= 25 && density > 0.5) {
+      // Don't remove the node if it's a list and the
+      // previous sibling starts with a colon though. That
+      // means it's probably content.
+      var tagName = $node.get(0).tagName;
+      var nodeIsList = tagName === 'ol' || tagName === 'ul';
+      if (nodeIsList) {
+        var previousNode = $node.prev();
+        if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {
+          return;
+        }
+      }
+
+      $node.remove();
+      return;
+    }
+
+    var scriptCount = $('script', $node).length;
+
+    // Too many script tags, not enough content.
+    if (scriptCount > 0 && contentLength < 150) {
+      $node.remove();
+      return;
+    }
+  }
+}
+
+// Given an article, clean it of some superfluous content specified by
+// tags. Things like forms, ads, etc.
+//
+// Tags is an array of tag name's to search through. (like div, form,
+// etc)
+//
+// Return this same doc.
+function cleanTags($article, $) {
+  $(CLEAN_CONDITIONALLY_TAGS, $article).each(function (index, node) {
+    var $node = $(node);
+    var weight = getScore($node);
+    if (!weight) {
+      weight = getOrInitScore($node, $);
+      setScore($node, $, weight);
+    }
+
+    // drop node if its weight is < 0
+    if (weight < 0) {
+      $node.remove();
+    } else {
+      // deteremine if node seems like content
+      removeUnlessContent($node, $, weight);
+    }
+  });
+
+  return $;
+}
+
+function cleanHeaders($article, $) {
+  var title = arguments.length <= 2 || arguments[2] === undefined ? '' : arguments[2];
+
+  $(HEADER_TAG_LIST, $article).each(function (index, header) {
+    var $header = $(header);
+    // Remove any headers that appear before all other p tags in the
+    // document. This probably means that it was part of the title, a
+    // subtitle or something else extraneous like a datestamp or byline,
+    // all of which should be handled by other metadata handling.
+    if ($($header, $article).prevAll('p').length === 0) {
+      return $header.remove();
+    }
+
+    // Remove any headers that match the title exactly.
+    if (normalizeSpaces($(header).text()) === title) {
+      return $header.remove();
+    }
+
+    // If this header has a negative weight, it's probably junk.
+    // Get rid of it.
+    if (getWeight($(header)) < 0) {
+      return $header.remove();
+    }
+
+    return $header;
+  });
+
+  return $;
+}
+
+// Rewrite the tag name to div if it's a top level node like body or
+// html to avoid later complications with multiple body tags.
+
+function rewriteTopLevel(article, $) {
+  // I'm not using context here because
+  // it's problematic when converting the
+  // top-level/root node - AP
+  $ = convertNodeTo($('html'), $, 'div');
+  $ = convertNodeTo($('body'), $, 'div');
+
+  return $;
+}
+
+function absolutize($, rootUrl, attr, $content) {
+  $('[' + attr + ']', $content).each(function (_, node) {
+    var url = node.attribs[attr];
+    var absoluteUrl = URL.resolve(rootUrl, url);
+
+    node.attribs[attr] = absoluteUrl;
+  });
+}
+
+function makeLinksAbsolute($content, $, url) {
+  ['href', 'src'].forEach(function (attr) {
+    return absolutize($, url, attr, $content);
+  });
+
+  return $content;
+}
+
+function textLength(text) {
+  return text.trim().replace(/\s+/g, ' ').length;
+}
+
+// Determines what percentage of the text
+// in a node is link text
+// Takes a node, returns a float
+function linkDensity($node) {
+  var totalTextLength = textLength($node.text());
+
+  var linkText = $node.find('a').text();
+  var linkLength = textLength(linkText);
+
+  if (totalTextLength > 0) {
+    return linkLength / totalTextLength;
+  } else if (totalTextLength === 0 && linkLength > 0) {
+    return 1;
+  }
+
+  return 0;
+}
+
+// Given a node type to search for, and a list of meta tag names to
+// search for, find a meta tag associated.
+
+function extractFromMeta($, metaNames, cachedNames) {
+  var cleanTags = arguments.length <= 3 || arguments[3] === undefined ? true : arguments[3];
+
+  var foundNames = metaNames.filter(function (name) {
+    return cachedNames.indexOf(name) !== -1;
+  });
+
+  var _iteratorNormalCompletion = true;
+  var _didIteratorError = false;
+  var _iteratorError = undefined;
+
+  try {
+    var _loop = function _loop() {
+      var name = _step.value;
+
+      var type = 'name';
+      var value = 'value';
+
+      var nodes = $('meta[' + type + '="' + name + '"]');
+
+      // Get the unique value of every matching node, in case there
+      // are two meta tags with the same name and value.
+      // Remove empty values.
+      var values = nodes.map(function (index, node) {
+        return $(node).attr(value);
+      }).toArray().filter(function (text) {
+        return text !== '';
+      });
+
+      // If we have more than one value for the same name, we have a
+      // conflict and can't trust any of them. Skip this name. If we have
+      // zero, that means our meta tags had no values. Skip this name
+      // also.
+      if (values.length === 1) {
+        var metaValue = void 0;
+        // Meta values that contain HTML should be stripped, as they
+        // weren't subject to cleaning previously.
+        if (cleanTags) {
+          metaValue = stripTags(values[0], $);
+        } else {
+          metaValue = values[0];
+        }
+
+        return {
+          v: metaValue
+        };
+      }
+    };
+
+    for (var _iterator = foundNames[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
+      var _ret = _loop();
+
+      if ((typeof _ret === 'undefined' ? 'undefined' : _typeof(_ret)) === "object") return _ret.v;
+    }
+
+    // If nothing is found, return null
+  } catch (err) {
+    _didIteratorError = true;
+    _iteratorError = err;
+  } finally {
+    try {
+      if (!_iteratorNormalCompletion && _iterator.return) {
+        _iterator.return();
+      }
+    } finally {
+      if (_didIteratorError) {
+        throw _iteratorError;
+      }
+    }
+  }
+
+  return null;
+}
+
+function isGoodNode($node, maxChildren) {
+  // If it has a number of children, it's more likely a container
+  // element. Skip it.
+  if ($node.children().length > maxChildren) {
+    return false;
+  }
+  // If it looks to be within a comment, skip it.
+  if (withinComment($node)) {
+    return false;
+  }
+
+  return true;
+}
+
+// Given a a list of selectors find content that may
+// be extractable from the document. This is for flat
+// meta-information, like author, title, date published, etc.
+function extractFromSelectors($, selectors) {
+  var maxChildren = arguments.length <= 2 || arguments[2] === undefined ? 1 : arguments[2];
+  var textOnly = arguments.length <= 3 || arguments[3] === undefined ? true : arguments[3];
+  var _iteratorNormalCompletion = true;
+  var _didIteratorError = false;
+  var _iteratorError = undefined;
+
+  try {
+    for (var _iterator = selectors[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
+      var selector = _step.value;
+
+      var nodes = $(selector);
+
+      // If we didn't get exactly one of this selector, this may be
+      // a list of articles or comments. Skip it.
+      if (nodes.length === 1) {
+        var $node = $(nodes[0]);
+
+        if (isGoodNode($node, maxChildren)) {
+          var content = void 0;
+          if (textOnly) {
+            content = $node.text();
+          } else {
+            content = $node.html();
+          }
+
+          if (content) {
+            return content;
+          }
+        }
+      }
+    }
+  } catch (err) {
+    _didIteratorError = true;
+    _iteratorError = err;
+  } finally {
+    try {
+      if (!_iteratorNormalCompletion && _iterator.return) {
+        _iterator.return();
+      }
+    } finally {
+      if (_didIteratorError) {
+        throw _iteratorError;
+      }
+    }
+  }
+
+  return null;
+}
+
+// strips all tags from a string of text
+function stripTags(text, $) {
+  // Wrapping text in html element prevents errors when text
+  // has no html
+  var cleanText = $('<span>' + text + '</span>').text();
+  return cleanText === '' ? text : cleanText;
+}
+
+function withinComment($node) {
+  var parents = $node.parents().toArray();
+  var commentParent = parents.find(function (parent) {
+    var classAndId = parent.attribs.class + ' ' + parent.attribs.id;
+    return classAndId.includes('comment');
+  });
+
+  return commentParent !== undefined;
+}
+
+// Given a node, determine if it's article-like enough to return
+// param: node (a cheerio node)
+// return: boolean
+
+function nodeIsSufficient($node) {
+  return $node.text().trim().length >= 100;
+}
+
+function isWordpress($) {
+  return $(IS_WP_SELECTOR).length > 0;
+}
+
+// CLEAN AUTHOR CONSTANTS
+var CLEAN_AUTHOR_RE = /^\s*(posted |written )?by\s*:?\s*(.*)/i;
+//     author = re.sub(r'^\s*(posted |written )?by\s*:?\s*(.*)(?i)',
+
+// CLEAN DEK CONSTANTS
+var TEXT_LINK_RE = new RegExp('http(s)?://', 'i');
+// CLEAN DATE PUBLISHED CONSTANTS
+var MS_DATE_STRING = /^\d{13}$/i;
+var SEC_DATE_STRING = /^\d{10}$/i;
+var CLEAN_DATE_STRING_RE = /^\s*published\s*:?\s*(.*)/i;
+var TIME_MERIDIAN_SPACE_RE = /(.*\d)(am|pm)(.*)/i;
+var TIME_MERIDIAN_DOTS_RE = /\.m\./i;
+var months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'];
+var allMonths = months.join('|');
+var timestamp1 = '[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';
+var timestamp2 = '[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';
+var SPLIT_DATE_STRING = new RegExp('(' + timestamp1 + ')|(' + timestamp2 + ')|([0-9]{1,4})|(' + allMonths + ')', 'ig');
+
+// CLEAN TITLE CONSTANTS
+// A regular expression that will match separating characters on a
+// title, that usually denote breadcrumbs or something similar.
+var TITLE_SPLITTERS_RE = /(: | - | \| )/g;
+
+var DOMAIN_ENDINGS_RE = new RegExp('.com$|.net$|.org$|.co.uk$', 'g');
+
+// Take an author string (like 'By David Smith ') and clean it to
+// just the name(s): 'David Smith'.
+function cleanAuthor(author) {
+  return author.replace(CLEAN_AUTHOR_RE, '$2').trim();
+}
+
+function clean$1(leadImageUrl) {
+  leadImageUrl = leadImageUrl.trim();
+  if (validUrl.isWebUri(leadImageUrl)) {
+    return leadImageUrl;
+  }
+
+  return null;
+}
+
+// Take a dek HTML fragment, and return the cleaned version of it.
+// Return None if the dek wasn't good enough.
+function cleanDek(dek, _ref) {
+  var $ = _ref.$;
+
+  // Sanity check that we didn't get too short or long of a dek.
+  if (dek.length > 1000 || dek.length < 5) return null;
+
+  var dekText = stripTags(dek, $);
+
+  // Plain text links shouldn't exist in the dek. If we have some, it's
+  // not a good dek - bail.
+  if (TEXT_LINK_RE.test(dekText)) return null;
+
+  return dekText.trim();
+}
+
+// Is there a compelling reason to use moment here?
+// Mostly only being used for the isValid() method,
+// but could just check for 'Invalid Date' string.
+
+function cleanDateString(dateString) {
+  return (dateString.match(SPLIT_DATE_STRING) || []).join(' ').replace(TIME_MERIDIAN_DOTS_RE, 'm').replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3').replace(CLEAN_DATE_STRING_RE, '$1').trim();
+}
+
+// Take a date published string, and hopefully return a date out of
+// it. Return none if we fail.
+function cleanDatePublished(dateString) {
+  // If string is in milliseconds or seconds, convert to int
+  if (MS_DATE_STRING.test(dateString) || SEC_DATE_STRING.test(dateString)) {
+    dateString = parseInt(dateString, 10);
+  }
+
+  var date = moment(new Date(dateString));
+
+  if (!date.isValid()) {
+    dateString = cleanDateString(dateString);
+    date = moment(new Date(dateString));
+  }
+
+  return date.isValid() ? date.toISOString() : null;
+}
+
+// Clean our article content, returning a new, cleaned node.
+
+function extractCleanNode(article, _ref) {
+  var $ = _ref.$;
+  var _ref$cleanConditional = _ref.cleanConditionally;
+  var cleanConditionally = _ref$cleanConditional === undefined ? true : _ref$cleanConditional;
+  var _ref$title = _ref.title;
+  var title = _ref$title === undefined ? '' : _ref$title;
+  var _ref$url = _ref.url;
+  var url = _ref$url === undefined ? '' : _ref$url;
+
+  // Rewrite the tag name to div if it's a top level node like body or
+  // html to avoid later complications with multiple body tags.
+  rewriteTopLevel(article, $);
+
+  // Drop small images and spacer images
+  cleanImages(article, $);
+
+  // Drop certain tags like <title>, etc
+  // This is -mostly- for cleanliness, not security.
+  stripJunkTags(article, $);
+
+  // H1 tags are typically the article title, which should be extracted
+  // by the title extractor instead. If there's less than 3 of them (<3),
+  // strip them. Otherwise, turn 'em into H2s.
+  cleanHOnes(article, $);
+
+  // Clean headers
+  cleanHeaders(article, $, title);
+
+  // Make links absolute
+  makeLinksAbsolute(article, $, url);
+
+  // Remove unnecessary attributes
+  cleanAttributes(article);
+
+  // We used to clean UL's and OL's here, but it was leading to
+  // too many in-article lists being removed. Consider a better
+  // way to detect menus particularly and remove them.
+  cleanTags(article, $, cleanConditionally);
+
+  // Remove empty paragraph nodes
+  removeEmpty(article, $);
+
+  return article;
+}
+
+function cleanTitle(title, _ref) {
+  var url = _ref.url;
+  var $ = _ref.$;
+
+  // If title has |, :, or - in it, see if
+  // we can clean it up.
+  if (TITLE_SPLITTERS_RE.test(title)) {
+    title = resolveSplitTitle(title, url);
+  }
+
+  // Final sanity check that we didn't get a crazy title.
+  // if (title.length > 150 || title.length < 15) {
+  if (title.length > 150) {
+    // If we did, return h1 from the document if it exists
+    var h1 = $('h1');
+    if (h1.length === 1) {
+      title = h1.text();
+    }
+  }
+
+  // strip any html tags in the title text
+  return stripTags(title, $).trim();
+}
+
+function extractBreadcrumbTitle(splitTitle, text) {
+  // This must be a very breadcrumbed title, like:
+  // The Best Gadgets on Earth : Bits : Blogs : NYTimes.com
+  // NYTimes - Blogs - Bits - The Best Gadgets on Earth
+  if (splitTitle.length >= 6) {
+    var _ret = function () {
+      // Look to see if we can find a breadcrumb splitter that happens
+      // more than once. If we can, we'll be able to better pull out
+      // the title.
+      var termCounts = splitTitle.reduce(function (acc, titleText) {
+        acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;
+        return acc;
+      }, {});
+
+      var _Reflect$ownKeys$redu = Reflect.ownKeys(termCounts).reduce(function (acc, key) {
+        if (acc[1] < termCounts[key]) {
+          return [key, termCounts[key]];
+        }
+
+        return acc;
+      }, [0, 0]);
+
+      var _Reflect$ownKeys$redu2 = slicedToArray(_Reflect$ownKeys$redu, 2);
+
+      var maxTerm = _Reflect$ownKeys$redu2[0];
+      var termCount = _Reflect$ownKeys$redu2[1];
+
+      // We found a splitter that was used more than once, so it
+      // is probably the breadcrumber. Split our title on that instead.
+      // Note: max_term should be <= 4 characters, so that " >> "
+      // will match, but nothing longer than that.
+
+      if (termCount >= 2 && maxTerm.length <= 4) {
+        splitTitle = text.split(maxTerm);
+      }
+
+      var splitEnds = [splitTitle[0], splitTitle.slice(-1)];
+      var longestEnd = splitEnds.reduce(function (acc, end) {
+        return acc.length > end.length ? acc : end;
+      }, '');
+
+      if (longestEnd.length > 10) {
+        return {
+          v: longestEnd
+        };
+      }
+
+      return {
+        v: text
+      };
+    }();
+
+    if ((typeof _ret === 'undefined' ? 'undefined' : _typeof(_ret)) === "object") return _ret.v;
+  }
+
+  return null;
+}
+
+function cleanDomainFromTitle(splitTitle, url) {
+  // Search the ends of the title, looking for bits that fuzzy match
+  // the URL too closely. If one is found, discard it and return the
+  // rest.
+  //
+  // Strip out the big TLDs - it just makes the matching a bit more
+  // accurate. Not the end of the world if it doesn't strip right.
+  var _URL$parse = URL.parse(url);
+
+  var host = _URL$parse.host;
+
+  var nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');
+
+  var startSlug = splitTitle[0].toLowerCase().replace(' ', '');
+  var startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);
+
+  if (startSlugRatio > 0.4 && startSlug.length > 5) {
+    return splitTitle.slice(2).join('');
+  }
+
+  var endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '');
+  var endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);
+
+  if (endSlugRatio > 0.4 && endSlug.length >= 5) {
+    return splitTitle.slice(0, -2).join('');
+  }
+
+  return null;
+}
+
+// Given a title with separators in it (colons, dashes, etc),
+// resolve whether any of the segments should be removed.
+function resolveSplitTitle(title) {
+  var url = arguments.length <= 1 || arguments[1] === undefined ? '' : arguments[1];
+
+  // Splits while preserving splitters, like:
+  // ['The New New York', ' - ', 'The Washington Post']
+  var splitTitle = title.split(TITLE_SPLITTERS_RE);
+  if (splitTitle.length === 1) {
+    return title;
+  }
+
+  var newTitle = extractBreadcrumbTitle(splitTitle, title);
+  if (newTitle) return newTitle;
+
+  newTitle = cleanDomainFromTitle(splitTitle, url);
+  if (newTitle) return newTitle;
+
+  // Fuzzy ratio didn't find anything, so this title is probably legit.
+  // Just return it all.
+  return title;
+}
+
+var Cleaners = {
+  author: cleanAuthor,
+  lead_image_url: clean$1,
+  dek: cleanDek,
+  date_published: cleanDatePublished,
+  content: extractCleanNode,
+  title: cleanTitle
+};
+
+// Using a variety of scoring techniques, extract the content most
+// likely to be article text.
+//
+// If strip_unlikely_candidates is True, remove any elements that
+// match certain criteria first. (Like, does this element have a
+// classname of "comment")
+//
+// If weight_nodes is True, use classNames and IDs to determine the
+// worthiness of nodes.
+//
+// Returns a cheerio object $
+function extractBestNode($, opts) {
+  // clone the node so we can get back to our
+  // initial parsed state if needed
+  // TODO Do I need this? – AP
+  // let $root = $.root().clone()
+
+
+  if (opts.stripUnlikelyCandidates) {
+    $ = stripUnlikelyCandidates($);
+  }
+
+  $ = convertToParagraphs($);
+  $ = scoreContent($, opts.weightNodes);
+  var $topCandidate = findTopCandidate($);
+
+  return $topCandidate;
+}
+
+var GenericContentExtractor = {
+  defaultOpts: {
+    stripUnlikelyCandidates: true,
+    weightNodes: true,
+    cleanConditionally: true
+  },
+
+  // Extract the content for this resource - initially, pass in our
+  // most restrictive opts which will return the highest quality
+  // content. On each failure, retry with slightly more lax opts.
+  //
+  // :param return_type: string. If "node", should return the content
+  // as a cheerio node rather than as an HTML string.
+  //
+  // Opts:
+  // stripUnlikelyCandidates: Remove any elements that match
+  // non-article-like criteria first.(Like, does this element
+  //   have a classname of "comment")
+  //
+  // weightNodes: Modify an elements score based on whether it has
+  // certain classNames or IDs. Examples: Subtract if a node has
+  // a className of 'comment', Add if a node has an ID of
+  // 'entry-content'.
+  //
+  // cleanConditionally: Clean the node to return of some
+  // superfluous content. Things like forms, ads, etc.
+  extract: function extract(_ref, opts) {
+    var $ = _ref.$;
+    var html = _ref.html;
+    var title = _ref.title;
+    var url = _ref.url;
+
+    opts = _extends({}, this.defaultOpts, opts);
+
+    $ = $ || cheerio.load(html);
+
+    // Cascade through our extraction-specific opts in an ordered fashion,
+    // turning them off as we try to extract content.
+    var node = this.getContentNode($, title, url, opts);
+
+    if (nodeIsSufficient(node)) {
+      return this.cleanAndReturnNode(node, $);
+    }
+
+    // We didn't succeed on first pass, one by one disable our
+    // extraction opts and try again.
+    var _iteratorNormalCompletion = true;
+    var _didIteratorError = false;
+    var _iteratorError = undefined;
+
+    try {
+      for (var _iterator = Reflect.ownKeys(opts).filter(function (k) {
+        return opts[k] === true;
+      })[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
+        var key = _step.value;
+
+        opts[key] = false;
+        $ = cheerio.load(html);
+
+        node = this.getContentNode($, title, url, opts);
+
+        if (nodeIsSufficient(node)) {
+          break;
+        }
+      }
+    } catch (err) {
+      _didIteratorError = true;
+      _iteratorError = err;
+    } finally {
+      try {
+        if (!_iteratorNormalCompletion && _iterator.return) {
+          _iterator.return();
+        }
+      } finally {
+        if (_didIteratorError) {
+          throw _iteratorError;
+        }
+      }
+    }
+
+    return this.cleanAndReturnNode(node, $);
+  },
+
+
+  // Get node given current options
+  getContentNode: function getContentNode($, title, url, opts) {
+    return extractCleanNode(extractBestNode($, opts), {
+      $: $,
+      cleanConditionally: opts.cleanConditionally,
+      title: title,
+      url: url
+    });
+  },
+
+
+  // Once we got here, either we're at our last-resort node, or
+  // we broke early. Make sure we at least have -something- before we
+  // move forward.
+  cleanAndReturnNode: function cleanAndReturnNode(node, $) {
+    if (!node) {
+      return null;
+    }
+
+    return normalizeSpaces($.html(node));
+
+    // if return_type == "html":
+    //     return normalize_spaces(node_to_html(node))
+    // else:
+    //     return node
+  }
+};
+
+// TODO: It would be great if we could merge the meta and selector lists into
+// a list of objects, because we could then rank them better. For example,
+// .hentry .entry-title is far better suited than <meta title>.
+
+// An ordered list of meta tag names that denote likely article titles. All
+// attributes should be lowercase for faster case-insensitive matching. From
+// most distinct to least distinct.
+var STRONG_TITLE_META_TAGS = ['tweetmeme-title', 'dc.title', 'rbtitle', 'headline', 'title'];
+
+// og:title is weak because it typically contains context that we don't like,
+// for example the source site's name. Gotta get that brand into facebook!
+var WEAK_TITLE_META_TAGS = ['og:title'];
+
+// An ordered list of XPath Selectors to find likely article titles. From
+// most explicit to least explicit.
+//
+// Note - this does not use classes like CSS. This checks to see if the string
+// exists in the className, which is not as accurate as .className (which
+// splits on spaces/endlines), but for our purposes it's close enough. The
+// speed tradeoff is worth the accuracy hit.
+var STRONG_TITLE_SELECTORS = ['.hentry .entry-title', 'h1#articleHeader', 'h1.articleHeader', 'h1.article', '.instapaper_title', '#meebo-title'];
+
+var WEAK_TITLE_SELECTORS = ['article h1', '#entry-title', '.entry-title', '#entryTitle', '#entrytitle', '.entryTitle', '.entrytitle', '#articleTitle', '.articleTitle', 'post post-title', 'h1.title', 'h2.article', 'h1', 'html head title', 'title'];
+
+var GenericTitleExtractor = {
+  extract: function extract(_ref) {
+    var $ = _ref.$;
+    var url = _ref.url;
+    var metaCache = _ref.metaCache;
+
+    // First, check to see if we have a matching meta tag that we can make
+    // use of that is strongly associated with the headline.
+    var title = void 0;
+
+    title = extractFromMeta($, STRONG_TITLE_META_TAGS, metaCache);
+    if (title) return cleanTitle(title, { url: url, $: $ });
+
+    // Second, look through our content selectors for the most likely
+    // article title that is strongly associated with the headline.
+    title = extractFromSelectors($, STRONG_TITLE_SELECTORS);
+    if (title) return cleanTitle(title, { url: url, $: $ });
+
+    // Third, check for weaker meta tags that may match.
+    title = extractFromMeta($, WEAK_TITLE_META_TAGS, metaCache);
+    if (title) return cleanTitle(title, { url: url, $: $ });
+
+    // Last, look for weaker selector tags that may match.
+    title = extractFromSelectors($, WEAK_TITLE_SELECTORS);
+    if (title) return cleanTitle(title, { url: url, $: $ });
+
+    // If no matches, return an empty string
+    return '';
+  }
+};
+
+// An ordered list of meta tag names that denote likely article authors. All
+// attributes should be lowercase for faster case-insensitive matching. From
+// most distinct to least distinct.
+//
+// Note: "author" is too often the -developer- of the page, so it is not
+// added here.
+var AUTHOR_META_TAGS = ['byl', 'clmst', 'dc.author', 'dcsext.author', 'dc.creator', 'rbauthors', 'authors'];
+
+var AUTHOR_MAX_LENGTH = 300;
+
+// An ordered list of XPath Selectors to find likely article authors. From
+// most explicit to least explicit.
+//
+// Note - this does not use classes like CSS. This checks to see if the string
+// exists in the className, which is not as accurate as .className (which
+// splits on spaces/endlines), but for our purposes it's close enough. The
+// speed tradeoff is worth the accuracy hit.
+var AUTHOR_SELECTORS = ['.entry .entry-author', '.author.vcard .fn', '.author .vcard .fn', '.byline.vcard .fn', '.byline .vcard .fn', '.byline .by .author', '.byline .by', '.byline .author', '.post-author.vcard', '.post-author .vcard', 'a[rel=author]', '#by_author', '.by_author', '#entryAuthor', '.entryAuthor', '.byline a[href*=author]', '#author .authorname', '.author .authorname', '#author', '.author', '.articleauthor', '.ArticleAuthor', '.byline'];
+
+// An ordered list of Selectors to find likely article authors, with
+// regular expression for content.
+var bylineRe = /^[\n\s]*By/i;
+var BYLINE_SELECTORS_RE = [['#byline', bylineRe], ['.byline', bylineRe]];
+
+var GenericAuthorExtractor = {
+  extract: function extract(_ref) {
+    var $ = _ref.$;
+    var metaCache = _ref.metaCache;
+
+    var author = void 0;
+
+    // First, check to see if we have a matching
+    // meta tag that we can make use of.
+    author = extractFromMeta($, AUTHOR_META_TAGS, metaCache);
+    if (author && author.length < AUTHOR_MAX_LENGTH) {
+      return cleanAuthor(author);
+    }
+
+    // Second, look through our selectors looking for potential authors.
+    author = extractFromSelectors($, AUTHOR_SELECTORS, 2);
+    if (author && author.length < AUTHOR_MAX_LENGTH) {
+      return cleanAuthor(author);
+    }
+
+    // Last, use our looser regular-expression based selectors for
+    // potential authors.
+    var _iteratorNormalCompletion = true;
+    var _didIteratorError = false;
+    var _iteratorError = undefined;
+
+    try {
+      for (var _iterator = BYLINE_SELECTORS_RE[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
+        var _ref4 = _step.value;
+
+        var _ref3 = slicedToArray(_ref4, 2);
+
+        var selector = _ref3[0];
+        var regex = _ref3[1];
+
+        var node = $(selector);
+        if (node.length === 1) {
+          var text = node.text();
+          if (regex.test(text)) {
+            return cleanAuthor(text);
+          }
+        }
+      }
+    } catch (err) {
+      _didIteratorError = true;
+      _iteratorError = err;
+    } finally {
+      try {
+        if (!_iteratorNormalCompletion && _iterator.return) {
+          _iterator.return();
+        }
+      } finally {
+        if (_didIteratorError) {
+          throw _iteratorError;
+        }
+      }
+    }
+
+    return null;
+  }
+};
+
+// An ordered list of meta tag names that denote
+// likely date published dates. All attributes
+// should be lowercase for faster case-insensitive matching.
+// From most distinct to least distinct.
+var DATE_PUBLISHED_META_TAGS = ['article:published_time', 'displaydate', 'dc.date', 'dc.date.issued', 'rbpubdate', 'publish_date', 'pub_date', 'pagedate', 'pubdate', 'revision_date', 'doc_date', 'date_created', 'content_create_date', 'lastmodified', 'created', 'date'];
+
+// An ordered list of XPath Selectors to find
+// likely date published dates. From most explicit
+// to least explicit.
+var DATE_PUBLISHED_SELECTORS = ['.hentry .dtstamp.published', '.hentry .published', '.hentry .dtstamp.updated', '.hentry .updated', '.single .published', '.meta .published', '.meta .postDate', '.entry-date', '.byline .date', '.postmetadata .date', '.article_datetime', '.date-header', '.story-date', '.dateStamp', '#story .datetime', '.dateline', '.pubdate'];
+
+// An ordered list of compiled regular expressions to find likely date
+// published dates from the URL. These should always have the first
+// reference be a date string that is parseable by dateutil.parser.parse
+var abbrevMonthsStr = '(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)';
+var DATE_PUBLISHED_URL_RES = [
+// /2012/01/27/ but not /2012/01/293
+new RegExp('/(20\\d{2}/\\d{2}/\\d{2})/', 'i'),
+// 20120127 or 20120127T but not 2012012733 or 8201201733
+// /[^0-9](20\d{2}[01]\d[0-3]\d)([^0-9]|$)/i,
+// 2012-01-27
+new RegExp('(20\\d{2}-[01]\\d-[0-3]\\d)', 'i'),
+// /2012/jan/27/
+new RegExp('/(20\\d{2}/' + abbrevMonthsStr + '/[0-3]\\d)/', 'i')];
+
+var GenericDatePublishedExtractor = {
+  extract: function extract(_ref) {
+    var $ = _ref.$;
+    var url = _ref.url;
+    var metaCache = _ref.metaCache;
+
+    var datePublished = void 0;
+    // First, check to see if we have a matching meta tag
+    // that we can make use of.
+    // Don't try cleaning tags from this string
+    datePublished = extractFromMeta($, DATE_PUBLISHED_META_TAGS, metaCache, false);
+    if (datePublished) return cleanDatePublished(datePublished);
+
+    // Second, look through our selectors looking for potential
+    // date_published's.
+    datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS);
+    if (datePublished) return cleanDatePublished(datePublished);
+
+    // Lastly, look to see if a dately string exists in the URL
+    datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);
+    if (datePublished) return cleanDatePublished(datePublished);
+
+    return null;
+  }
+};
+
+// import {
+//   DEK_META_TAGS,
+//   DEK_SELECTORS,
+//   DEK_URL_RES,
+// } from './constants';
+
+// import { cleanDek } from 'cleaners';
+
+// import {
+//   extractFromMeta,
+//   extractFromSelectors,
+// } from 'utils/dom';
+
+// Currently there is only one selector for
+// deks. We should simply return null here
+// until we have a more robust generic option.
+// Below is the original source for this, for reference.
+var GenericDekExtractor = {
+  // extract({ $, content, metaCache }) {
+  extract: function extract() {
+    return null;
+  }
+};
+
+// An ordered list of meta tag names that denote likely article leading images.
+// All attributes should be lowercase for faster case-insensitive matching.
+// From most distinct to least distinct.
+var LEAD_IMAGE_URL_META_TAGS = ['og:image', 'twitter:image', 'image_src'];
+
+var LEAD_IMAGE_URL_SELECTORS = ['link[rel=image_src]'];
+
+var POSITIVE_LEAD_IMAGE_URL_HINTS = ['upload', 'wp-content', 'large', 'photo', 'wp-image'];
+var POSITIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(POSITIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');
+
+var NEGATIVE_LEAD_IMAGE_URL_HINTS = ['spacer', 'sprite', 'blank', 'throbber', 'gradient', 'tile', 'bg', 'background', 'icon', 'social', 'header', 'hdr', 'advert', 'spinner', 'loader', 'loading', 'default', 'rating', 'share', 'facebook', 'twitter', 'theme', 'promo', 'ads', 'wp-includes'];
+var NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');
+
+var GIF_RE = /\.gif(\?.*)?$/i;
+var JPG_RE = /\.jpe?g(\?.*)?$/i;
+
+function getSig($node) {
+  return ($node.attr('class') || '') + ' ' + ($node.attr('id') || '');
+}
+
+// Scores image urls based on a variety of heuristics.
+function scoreImageUrl(url) {
+  url = url.trim();
+  var score = 0;
+
+  if (POSITIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {
+    score += 20;
+  }
+
+  if (NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {
+    score -= 20;
+  }
+
+  // TODO: We might want to consider removing this as
+  // gifs are much more common/popular than they once were
+  if (GIF_RE.test(url)) {
+    score -= 10;
+  }
+
+  if (JPG_RE.test(url)) {
+    score += 10;
+  }
+
+  // PNGs are neutral.
+
+  return score;
+}
+
+// Alt attribute usually means non-presentational image.
+function scoreAttr($img) {
+  if ($img.attr('alt')) {
+    return 5;
+  }
+
+  return 0;
+}
+
+// Look through our parent and grandparent for figure-like
+// container elements, give a bonus if we find them
+function scoreByParents($img) {
+  var score = 0;
+  var $figParent = $img.parents('figure').first();
+
+  if ($figParent.length === 1) {
+    score += 25;
+  }
+
+  var $parent = $img.parent();
+  var $gParent = void 0;
+  if ($parent.length === 1) {
+    $gParent = $parent.parent();
+  }
+
+  [$parent, $gParent].forEach(function ($node) {
+    if (PHOTO_HINTS_RE$1.test(getSig($node))) {
+      score += 15;
+    }
+  });
+
+  return score;
+}
+
+// Look at our immediate sibling and see if it looks like it's a
+// caption. Bonus if so.
+function scoreBySibling($img) {
+  var score = 0;
+  var $sibling = $img.next();
+  var sibling = $sibling.get(0);
+
+  if (sibling && sibling.tagName === 'figcaption') {
+    score += 25;
+  }
+
+  if (PHOTO_HINTS_RE$1.test(getSig($sibling))) {
+    score += 15;
+  }
+
+  return score;
+}
+
+function scoreByDimensions($img) {
+  var score = 0;
+
+  var width = parseFloat($img.attr('width'));
+  var height = parseFloat($img.attr('height'));
+  var src = $img.attr('src');
+
+  // Penalty for skinny images
+  if (width && width <= 50) {
+    score -= 50;
+  }
+
+  // Penalty for short images
+  if (height && height <= 50) {
+    score -= 50;
+  }
+
+  if (width && height && !src.includes('sprite')) {
+    var area = width * height;
+    if (area < 5000) {
+      // Smaller than 50 x 100
+      score -= 100;
+    } else {
+      score += Math.round(area / 1000);
+    }
+  }
+
+  return score;
+}
+
+function scoreByPosition($imgs, index) {
+  return $imgs.length / 2 - index;
+}
+
+// Given a resource, try to find the lead image URL from within
+// it. Like content and next page extraction, uses a scoring system
+// to determine what the most likely image may be. Short circuits
+// on really probable things like og:image meta tags.
+//
+// Potential signals to still take advantage of:
+//   * domain
+//   * weird aspect ratio
+var GenericLeadImageUrlExtractor = {
+  extract: function extract(_ref) {
+    var $ = _ref.$;
+    var content = _ref.content;
+    var metaCache = _ref.metaCache;
+
+    var cleanUrl = void 0;
+
+    // Check to see if we have a matching meta tag that we can make use of.
+    // Moving this higher because common practice is now to use large
+    // images on things like Open Graph or Twitter cards.
+    // images usually have for things like Open Graph.
+    var imageUrl = extractFromMeta($, LEAD_IMAGE_URL_META_TAGS, metaCache, false);
+
+    if (imageUrl) {
+      cleanUrl = clean$1(imageUrl);
+
+      if (cleanUrl) return cleanUrl;
+    }
+
+    // Next, try to find the "best" image via the content.
+    // We'd rather not have to fetch each image and check dimensions,
+    // so try to do some analysis and determine them instead.
+    var imgs = $('img', content).toArray();
+    var imgScores = {};
+
+    imgs.forEach(function (img, index) {
+      var $img = $(img);
+      var src = $img.attr('src');
+
+      if (!src) return;
+
+      var score = scoreImageUrl(src);
+      score += scoreAttr($img);
+      score += scoreByParents($img);
+      score += scoreBySibling($img);
+      score += scoreByDimensions($img);
+      score += scoreByPosition(imgs, index);
+
+      imgScores[src] = score;
+    });
+
+    var _Reflect$ownKeys$redu = Reflect.ownKeys(imgScores).reduce(function (acc, key) {
+      return imgScores[key] > acc[1] ? [key, imgScores[key]] : acc;
+    }, [null, 0]);
+
+    var _Reflect$ownKeys$redu2 = slicedToArray(_Reflect$ownKeys$redu, 2);
+
+    var topUrl = _Reflect$ownKeys$redu2[0];
+    var topScore = _Reflect$ownKeys$redu2[1];
+
+
+    if (topScore > 0) {
+      cleanUrl = clean$1(topUrl);
+
+      if (cleanUrl) return cleanUrl;
+    }
+
+    // If nothing else worked, check to see if there are any really
+    // probable nodes in the doc, like <link rel="image_src" />.
+    var _iteratorNormalCompletion = true;
+    var _didIteratorError = false;
+    var _iteratorError = undefined;
+
+    try {
+      for (var _iterator = LEAD_IMAGE_URL_SELECTORS[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
+        var selector = _step.value;
+
+        var $node = $(selector).first();
+        var src = $node.attr('src');
+        if (src) {
+          cleanUrl = clean$1(src);
+          if (cleanUrl) return cleanUrl;
+        }
+
+        var href = $node.attr('href');
+        if (href) {
+          cleanUrl = clean$1(href);
+          if (cleanUrl) return cleanUrl;
+        }
+
+        var value = $node.attr('value');
+        if (value) {
+          cleanUrl = clean$1(value);
+          if (cleanUrl) return cleanUrl;
+        }
+      }
+    } catch (err) {
+      _didIteratorError = true;
+      _iteratorError = err;
+    } finally {
+      try {
+        if (!_iteratorNormalCompletion && _iterator.return) {
+          _iterator.return();
+        }
+      } finally {
+        if (_didIteratorError) {
+          throw _iteratorError;
+        }
+      }
+    }
+
+    return null;
+  }
+};
+
+// def extract(self):
+//     """
+//     # First, try to find the "best" image via the content.
+//     # We'd rather not have to fetch each image and check dimensions,
+//     # so try to do some analysis and determine them instead.
+//     content = self.extractor.extract_content(return_type="node")
+//     imgs = content.xpath('.//img')
+//     img_scores = defaultdict(int)
+//     logger.debug('Scoring %d images from content', len(imgs))
+//     for (i, img) in enumerate(imgs):
+//         img_score = 0
+//
+//         if not 'src' in img.attrib:
+//             logger.debug('No src attribute found')
+//             continue
+//
+//         try:
+//             parsed_img = urlparse(img.attrib['src'])
+//             img_path = parsed_img.path.lower()
+//         except ValueError:
+//             logger.debug('ValueError getting img path.')
+//             continue
+//         logger.debug('Image path is %s', img_path)
+//
+//         if constants.POSITIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):
+//             logger.debug('Positive URL hints match. Adding 20.')
+//             img_score += 20
+//
+//         if constants.NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):
+//             logger.debug('Negative URL hints match. Subtracting 20.')
+//             img_score -= 20
+//
+//         # Gifs are more often structure than photos
+//         if img_path.endswith('gif'):
+//             logger.debug('gif found. Subtracting 10.')
+//             img_score -= 10
+//
+//         # JPGs are more often photographs
+//         if img_path.endswith('jpg'):
+//             logger.debug('jpg found. Adding 10.')
+//             img_score += 10
+//
+//         # PNGs are neutral.
+//
+//         # Alt attribute usually means non-presentational image.
+//         if 'alt' in img.attrib and len(img.attrib['alt']) > 5:
+//             logger.debug('alt attribute found. Adding 5.')
+//             img_score += 5
+//
+//         # Look through our parent and grandparent for figure-like
+//         # container elements, give a bonus if we find them
+//         parents = [img.getparent()]
+//         if parents[0] is not None and parents[0].getparent() is not None:
+//             parents.append(parents[0].getparent())
+//         for p in parents:
+//             if p.tag == 'figure':
+//                 logger.debug('Parent with <figure> tag found. Adding 25.')
+//                 img_score += 25
+//
+//             p_sig = ' '.join([p.get('id', ''), p.get('class', '')])
+//             if constants.PHOTO_HINTS_RE.search(p_sig):
+//                 logger.debug('Photo hints regex match. Adding 15.')
+//                 img_score += 15
+//
+//         # Look at our immediate sibling and see if it looks like it's a
+//         # caption. Bonus if so.
+//         sibling = img.getnext()
+//         if sibling is not None:
+//             if sibling.tag == 'figcaption':
+//                 img_score += 25
+//
+//             sib_sig = ' '.join([sibling.get('id', ''),
+//                                 sibling.get('class', '')]).lower()
+//             if 'caption' in sib_sig:
+//                 img_score += 15
+//
+//         # Pull out width/height if they were set.
+//         img_width = None
+//         img_height = None
+//         if 'width' in img.attrib:
+//             try:
+//                 img_width = float(img.get('width'))
+//             except ValueError:
+//                 pass
+//         if 'height' in img.attrib:
+//             try:
+//                 img_height = float(img.get('height'))
+//             except ValueError:
+//                 pass
+//
+//         # Penalty for skinny images
+//         if img_width and img_width <= 50:
+//             logger.debug('Skinny image found. Subtracting 50.')
+//             img_score -= 50
+//
+//         # Penalty for short images
+//         if img_height and img_height <= 50:
+//             # Wide, short images are more common than narrow, tall ones
+//             logger.debug('Short image found. Subtracting 25.')
+//             img_score -= 25
+//
+//         if img_width and img_height and not 'sprite' in img_path:
+//             area = img_width * img_height
+//
+//             if area < 5000: # Smaller than 50x100
+//                 logger.debug('Image with small area found. Subtracting 100.')
+//                 img_score -= 100
+//             else:
+//                 img_score += round(area/1000.0)
+//
+//         # If the image is higher on the page than other images,
+//         # it gets a bonus. Penalty if lower.
+//         logger.debug('Adding page placement bonus of %d.', len(imgs)/2 - i)
+//         img_score += len(imgs)/2 - i
+//
+//         # Use the raw src here because we munged img_path for case
+//         # insensitivity
+//         logger.debug('Final score is %d.', img_score)
+//         img_scores[img.attrib['src']] += img_score
+//
+//     top_score = 0
+//     top_url = None
+//     for (url, score) in img_scores.items():
+//         if score > top_score:
+//             top_url = url
+//             top_score = score
+//
+//     if top_score > 0:
+//         logger.debug('Using top score image from content. Score was %d', top_score)
+//         return top_url
+//
+//
+//     # If nothing else worked, check to see if there are any really
+//     # probable nodes in the doc, like <link rel="image_src" />.
+//     logger.debug('Trying to find lead image in probable nodes')
+//     for selector in constants.LEAD_IMAGE_URL_SELECTORS:
+//         nodes = self.resource.extract_by_selector(selector)
+//         for node in nodes:
+//             clean_value = None
+//             if node.attrib.get('src'):
+//                 clean_value = self.clean(node.attrib['src'])
+//
+//             if not clean_value and node.attrib.get('href'):
+//                 clean_value = self.clean(node.attrib['href'])
+//
+//             if not clean_value and node.attrib.get('value'):
+//                 clean_value = self.clean(node.attrib['value'])
+//
+//             if clean_value:
+//                 logger.debug('Found lead image in probable nodes.')
+//                 logger.debug('Node was: %s', node)
+//                 return clean_value
+//
+//     return None
+
+function scoreSimilarity(score, articleUrl, href) {
+  // Do this last and only if we have a real candidate, because it's
+  // potentially expensive computationally. Compare the link to this
+  // URL using difflib to get the % similarity of these URLs. On a
+  // sliding scale, subtract points from this link based on
+  // similarity.
+  if (score > 0) {
+    var similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio();
+    // Subtract .1 from diff_percent when calculating modifier,
+    // which means that if it's less than 10% different, we give a
+    // bonus instead. Ex:
+    //  3% different = +17.5 points
+    // 10% different = 0 points
+    // 20% different = -25 points
+    var diffPercent = 1.0 - similarity;
+    var diffModifier = -(250 * (diffPercent - 0.2));
+    return score + diffModifier;
+  }
+
+  return 0;
+}
+
+function scoreLinkText(linkText, pageNum) {
+  // If the link text can be parsed as a number, give it a minor
+  // bonus, with a slight bias towards lower numbered pages. This is
+  // so that pages that might not have 'next' in their text can still
+  // get scored, and sorted properly by score.
+  var score = 0;
+
+  if (IS_DIGIT_RE.test(linkText.trim())) {
+    var linkTextAsNum = parseInt(linkText, 10);
+    // If it's the first page, we already got it on the first call.
+    // Give it a negative score. Otherwise, up to page 10, give a
+    // small bonus.
+    if (linkTextAsNum < 2) {
+      score = -30;
+    } else {
+      score = Math.max(0, 10 - linkTextAsNum);
+    }
+
+    // If it appears that the current page number is greater than
+    // this links page number, it's a very bad sign. Give it a big
+    // penalty.
+    if (pageNum && pageNum >= linkTextAsNum) {
+      score -= 50;
+    }
+  }
+
+  return score;
+}
+
+function scorePageInLink(pageNum, isWp) {
+  // page in the link = bonus. Intentionally ignore wordpress because
+  // their ?p=123 link style gets caught by this even though it means
+  // separate documents entirely.
+  if (pageNum && !isWp) {
+    return 50;
+  }
+
+  return 0;
+}
+
+var DIGIT_RE$2 = /\d/;
+
+// A list of words that, if found in link text or URLs, likely mean that
+// this link is not a next page link.
+var EXTRANEOUS_LINK_HINTS$1 = ['print', 'archive', 'comment', 'discuss', 'e-mail', 'email', 'share', 'reply', 'all', 'login', 'sign', 'single', 'adx', 'entry-unrelated'];
+var EXTRANEOUS_LINK_HINTS_RE$1 = new RegExp(EXTRANEOUS_LINK_HINTS$1.join('|'), 'i');
+
+// Match any link text/classname/id that looks like it could mean the next
+// page. Things like: next, continue, >, >>, » but not >|, »| as those can
+// mean last page.
+var NEXT_LINK_TEXT_RE$1 = new RegExp('(next|weiter|continue|>([^|]|$)|»([^|]|$))', 'i');
+
+// Match any link text/classname/id that looks like it is an end link: things
+// like "first", "last", "end", etc.
+var CAP_LINK_TEXT_RE$1 = new RegExp('(first|last|end)', 'i');
+
+// Match any link text/classname/id that looks like it means the previous
+// page.
+var PREV_LINK_TEXT_RE$1 = new RegExp('(prev|earl|old|new|<|«)', 'i');
+
+function scoreExtraneousLinks(href) {
+  // If the URL itself contains extraneous values, give a penalty.
+  if (EXTRANEOUS_LINK_HINTS_RE$1.test(href)) {
+    return -25;
+  }
+
+  return 0;
+}
+
+function makeSig$1($link) {
+  return ($link.attr('class') || '') + ' ' + ($link.attr('id') || '');
+}
+
+function scoreByParents$1($link) {
+  // If a parent node contains paging-like classname or id, give a
+  // bonus. Additionally, if a parent_node contains bad content
+  // (like 'sponsor'), give a penalty.
+  var $parent = $link.parent();
+  var positiveMatch = false;
+  var negativeMatch = false;
+  var score = 0;
+
+  Array.from(range(0, 4)).forEach(function () {
+    if ($parent.length === 0) {
+      return;
+    }
+
+    var parentData = makeSig$1($parent, ' ');
+
+    // If we have 'page' or 'paging' in our data, that's a good
+    // sign. Add a bonus.
+    if (!positiveMatch && PAGE_RE.test(parentData)) {
+      positiveMatch = true;
+      score += 25;
+    }
+
+    // If we have 'comment' or something in our data, and
+    // we don't have something like 'content' as well, that's
+    // a bad sign. Give a penalty.
+    if (!negativeMatch && NEGATIVE_SCORE_RE.test(parentData) && EXTRANEOUS_LINK_HINTS_RE$1.test(parentData)) {
+      if (!POSITIVE_SCORE_RE.test(parentData)) {
+        negativeMatch = true;
+        score -= 25;
+      }
+    }
+
+    $parent = $parent.parent();
+  });
+
+  return score;
+}
+
+function scorePrevLink(linkData) {
+  // If the link has something like "previous", its definitely
+  // an old link, skip it.
+  if (PREV_LINK_TEXT_RE$1.test(linkData)) {
+    return -200;
+  }
+
+  return 0;
+}
+
+function shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls) {
+  // skip if we've already fetched this url
+  if (previousUrls.find(function (url) {
+    return href === url;
+  }) !== undefined) {
+    return false;
+  }
+
+  // If we've already parsed this URL, or the URL matches the base
+  // URL, or is empty, skip it.
+  if (!href || href === articleUrl || href === baseUrl) {
+    return false;
+  }
+
+  var hostname = parsedUrl.hostname;
+
+  var _URL$parse = URL.parse(href);
+
+  var linkHost = _URL$parse.hostname;
+
+  // Domain mismatch.
+
+  if (linkHost !== hostname) {
+    return false;
+  }
+
+  // If href doesn't contain a digit after removing the base URL,
+  // it's certainly not the next page.
+  var fragment = href.replace(baseUrl, '');
+  if (!DIGIT_RE$2.test(fragment)) {
+    return false;
+  }
+
+  // This link has extraneous content (like "comment") in its link
+  // text, so we skip it.
+  if (EXTRANEOUS_LINK_HINTS_RE$1.test(linkText)) {
+    return false;
+  }
+
+  // Next page link text is never long, skip if it is too long.
+  if (linkText.length > 25) {
+    return false;
+  }
+
+  return true;
+}
+
+function scoreBaseUrl(href, baseRegex) {
+  // If the baseUrl isn't part of this URL, penalize this
+  // link. It could still be the link, but the odds are lower.
+  // Example:
+  // http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html
+  if (!baseRegex.test(href)) {
+    return -25;
+  }
+
+  return 0;
+}
+
+function scoreNextLinkText(linkData) {
+  // Things like "next", ">>", etc.
+  if (NEXT_LINK_TEXT_RE$1.test(linkData)) {
+    return 50;
+  }
+
+  return 0;
+}
+
+function scoreCapLinks(linkData) {
+  // Cap links are links like "last", etc.
+  if (CAP_LINK_TEXT_RE$1.test(linkData)) {
+    // If we found a link like "last", but we've already seen that
+    // this link is also "next", it's fine. If it's not been
+    // previously marked as "next", then it's probably bad.
+    // Penalize.
+    if (NEXT_LINK_TEXT_RE$1.test(linkData)) {
+      return -65;
+    }
+  }
+
+  return 0;
+}
+
+function makeBaseRegex(baseUrl) {
+  return new RegExp('^' + baseUrl, 'i');
+}
+
+function makeSig($link, linkText) {
+  return (linkText || $link.text()) + ' ' + ($link.attr('class') || '') + ' ' + ($link.attr('id') || '');
+}
+
+function scoreLinks(_ref) {
+  var links = _ref.links;
+  var articleUrl = _ref.articleUrl;
+  var baseUrl = _ref.baseUrl;
+  var parsedUrl = _ref.parsedUrl;
+  var $ = _ref.$;
+  var _ref$previousUrls = _ref.previousUrls;
+  var previousUrls = _ref$previousUrls === undefined ? [] : _ref$previousUrls;
+
+  parsedUrl = parsedUrl || URL.parse(articleUrl);
+  var baseRegex = makeBaseRegex(baseUrl);
+  var isWp = isWordpress($);
+
+  // Loop through all links, looking for hints that they may be next-page
+  // links. Things like having "page" in their textContent, className or
+  // id, or being a child of a node with a page-y className or id.
+  //
+  // After we do that, assign each page a score, and pick the one that
+  // looks most like the next page link, as long as its score is strong
+  // enough to have decent confidence.
+  var scoredPages = links.reduce(function (possiblePages, link) {
+    // Remove any anchor data since we don't do a good job
+    // standardizing URLs (it's hard), we're going to do
+    // some checking with and without a trailing slash
+    var href = removeAnchor(link.attribs.href);
+    var $link = $(link);
+    var linkText = $link.text();
+
+    if (!shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls)) {
+      return possiblePages;
+    }
+
+    // ## PASSED THE FIRST-PASS TESTS. Start scoring. ##
+    if (!possiblePages[href]) {
+      possiblePages[href] = {
+        score: 0,
+        linkText: linkText,
+        href: href
+      };
+    } else {
+      possiblePages[href].linkText = possiblePages[href].linkText + '|' + linkText;
+    }
+
+    var possiblePage = possiblePages[href];
+    var linkData = makeSig($link, linkText);
+    var pageNum = pageNumFromUrl(href);
+
+    var score = scoreBaseUrl(href, baseRegex);
+    score += scoreNextLinkText(linkData);
+    score += scoreCapLinks(linkData);
+    score += scorePrevLink(linkData);
+    score += scoreByParents$1($link);
+    score += scoreExtraneousLinks(href);
+    score += scorePageInLink(pageNum, isWp);
+    score += scoreLinkText(linkText, pageNum);
+    score += scoreSimilarity(score, articleUrl, href);
+
+    possiblePage.score = score;
+
+    return possiblePages;
+  }, {});
+
+  return Reflect.ownKeys(scoredPages).length === 0 ? null : scoredPages;
+}
+
+// Looks for and returns next page url
+// for multi-page articles
+var GenericNextPageUrlExtractor = {
+  extract: function extract(_ref) {
+    var $ = _ref.$;
+    var url = _ref.url;
+    var parsedUrl = _ref.parsedUrl;
+    var _ref$previousUrls = _ref.previousUrls;
+    var previousUrls = _ref$previousUrls === undefined ? [] : _ref$previousUrls;
+
+    parsedUrl = parsedUrl || URL.parse(url);
+
+    var articleUrl = removeAnchor(url);
+    var baseUrl = articleBaseUrl(url, parsedUrl);
+
+    var links = $('a[href]').toArray();
+
+    var scoredLinks = scoreLinks({
+      links: links,
+      articleUrl: articleUrl,
+      baseUrl: baseUrl,
+      parsedUrl: parsedUrl,
+      $: $,
+      previousUrls: previousUrls
+    });
+
+    // If no links were scored, return null
+    if (!scoredLinks) return null;
+
+    // now that we've scored all possible pages,
+    // find the biggest one.
+    var topPage = Reflect.ownKeys(scoredLinks).reduce(function (acc, link) {
+      var scoredLink = scoredLinks[link];
+      return scoredLink.score > acc.score ? scoredLink : acc;
+    }, { score: -100 });
+
+    // If the score is less than 50, we're not confident enough to use it,
+    // so we fail.
+    if (topPage.score >= 50) {
+      return topPage.href;
+    }
+
+    return null;
+  }
+};
+
+var CANONICAL_META_SELECTORS = ['og:url'];
+
+function parseDomain(url) {
+  var parsedUrl = URL.parse(url);
+  var hostname = parsedUrl.hostname;
+
+  return hostname;
+}
+
+function result(url) {
+  return {
+    url: url,
+    domain: parseDomain(url)
+  };
+}
+
+var GenericUrlExtractor = {
+  extract: function extract(_ref) {
+    var $ = _ref.$;
+    var url = _ref.url;
+    var metaCache = _ref.metaCache;
+
+    var $canonical = $('link[rel=canonical]');
+    if ($canonical.length !== 0) {
+      var href = $canonical.attr('href');
+      if (href) {
+        return result(href);
+      }
+    }
+
+    var metaUrl = extractFromMeta($, CANONICAL_META_SELECTORS, metaCache);
+    if (metaUrl) {
+      return result(metaUrl);
+    }
+
+    return result(url);
+  }
+};
+
+var EXCERPT_META_SELECTORS = ['og:description', 'twitter:description'];
+
+function clean$2(content, $) {
+  var maxLength = arguments.length <= 2 || arguments[2] === undefined ? 200 : arguments[2];
+
+  content = content.replace(/[\s\n]+/g, ' ').trim();
+  return ellipsize(content, maxLength, { ellipse: '&hellip;' });
+}
+
+var GenericExcerptExtractor = {
+  extract: function extract(_ref) {
+    var $ = _ref.$;
+    var content = _ref.content;
+    var metaCache = _ref.metaCache;
+
+    var excerpt = extractFromMeta($, EXCERPT_META_SELECTORS, metaCache);
+    if (excerpt) {
+      return clean$2(stripTags(excerpt, $));
+    }
+    // Fall back to excerpting from the extracted content
+    var maxLength = 200;
+    var shortContent = content.slice(0, maxLength * 5);
+    return clean$2($(shortContent).text(), $, maxLength);
+  }
+};
+
+var GenericWordCountExtractor = {
+  extract: function extract(_ref) {
+    var content = _ref.content;
+
+    var $ = cheerio.load(content);
+
+    var text = normalizeSpaces($('div').first().text());
+    return text.split(/\s/).length;
+  }
+};
+
+var GenericExtractor = {
+  // This extractor is the default for all domains
+  domain: '*',
+  title: GenericTitleExtractor.extract,
+  date_published: GenericDatePublishedExtractor.extract,
+  author: GenericAuthorExtractor.extract,
+  content: GenericContentExtractor.extract.bind(GenericContentExtractor),
+  lead_image_url: GenericLeadImageUrlExtractor.extract,
+  dek: GenericDekExtractor.extract,
+  next_page_url: GenericNextPageUrlExtractor.extract,
+  url_and_domain: GenericUrlExtractor.extract,
+  excerpt: GenericExcerptExtractor.extract,
+  word_count: GenericWordCountExtractor.extract,
+  direction: function direction(_ref) {
+    var title = _ref.title;
+    return stringDirection.getDirection(title);
+  },
+
+  extract: function extract(options) {
+    var html = options.html;
+
+
+    if (html) {
+      var $ = cheerio.load(html);
+      options.$ = $;
+    }
+
+    var title = this.title(options);
+    var date_published = this.date_published(options);
+    var author = this.author(options);
+    var content = this.content(_extends({}, options, { title: title }));
+    var lead_image_url = this.lead_image_url(_extends({}, options, { content: content }));
+    var dek = this.dek(_extends({}, options, { content: content }));
+    var next_page_url = this.next_page_url(options);
+    var excerpt = this.excerpt(_extends({}, options, { content: content }));
+    var word_count = this.word_count(_extends({}, options, { content: content }));
+    var direction = this.direction({ title: title });
+
+    var _url_and_domain = this.url_and_domain(options);
+
+    var url = _url_and_domain.url;
+    var domain = _url_and_domain.domain;
+
+
+    return {
+      title: title,
+      author: author,
+      date_published: date_published || null,
+      dek: dek,
+      lead_image_url: lead_image_url,
+      content: content,
+      next_page_url: next_page_url,
+      url: url,
+      domain: domain,
+      excerpt: excerpt,
+      word_count: word_count,
+      direction: direction
+    };
+  }
+};
+
+function getExtractor(url, parsedUrl) {
+  parsedUrl = parsedUrl || URL.parse(url);
+  var _parsedUrl = parsedUrl;
+  var hostname = _parsedUrl.hostname;
+
+  var baseDomain = hostname.split('.').slice(-2).join('.');
+
+  return Extractors[hostname] || Extractors[baseDomain] || GenericExtractor;
+}
+
+var ATTR_RE = /\[([\w-]+)\]/;
+
+// Remove elements by an array of selectors
+function cleanBySelectors($content, $, _ref) {
+  var clean = _ref.clean;
+
+  if (!clean) return null;
+
+  $(clean.join(','), $content).remove();
+
+  return $content;
+}
+
+// Transform matching elements
+function transformElements($content, $, _ref2) {
+  var transforms = _ref2.transforms;
+
+  if (!transforms) return null;
+
+  Reflect.ownKeys(transforms).forEach(function (key) {
+    var $matches = $(key, $content);
+    var value = transforms[key];
+
+    // If value is a string, convert directly
+    if (typeof value === 'string') {
+      $matches.each(function (index, node) {
+        convertNodeTo($(node), $, transforms[key]);
+      });
+    } else if (typeof value === 'function') {
+      // If value is function, apply function to node
+      $matches.each(function (index, node) {
+        var result = value($(node), $);
+        // If function returns a string, convert node to that value
+        if (typeof result === 'string') {
+          convertNodeTo($(node), $, result);
+        }
+      });
+    }
+  });
+
+  return $content;
+}
+
+function select(opts) {
+  var $ = opts.$;
+  var type = opts.type;
+  var extractionOpts = opts.extractionOpts;
+  var _opts$extractHtml = opts.extractHtml;
+  var extractHtml = _opts$extractHtml === undefined ? false : _opts$extractHtml;
+  // Skip if there's not extraction for this type
+
+  if (!extractionOpts) return null;
+
+  // If a string is hardcoded for a type (e.g., Wikipedia
+  // contributors), return the string
+  if (typeof extractionOpts === 'string') return extractionOpts;
+
+  var selectors = extractionOpts.selectors;
+  var _extractionOpts$defau = extractionOpts.defaultCleaner;
+  var defaultCleaner = _extractionOpts$defau === undefined ? true : _extractionOpts$defau;
+
+
+  var matchingSelector = selectors.find(function (selector) {
+    return $(selector).length === 1 && $(selector).text().trim() !== '';
+  });
+
+  if (!matchingSelector) return null;
+
+  // Declaring result; will contain either
+  // text or html, which will be cleaned
+  // by the appropriate cleaner type
+
+  // If the selector type requests html as its return type
+  // transform and clean the element with provided selectors
+  if (extractHtml) {
+    var $content = $(matchingSelector);
+
+    // Wrap in div so transformation can take place on root element
+    $content.wrap($('<div></div>'));
+    $content = $content.parent();
+
+    $content = transformElements($content, $, extractionOpts);
+    $content = cleanBySelectors($content, $, extractionOpts);
+
+    if (defaultCleaner) {
+      $content = Cleaners[type]($content, opts);
+    }
+
+    return $.html($content);
+  }
+  // if selector includes an attr (e.g., img[src]),
+  // extract the attr
+  var attr = matchingSelector.match(ATTR_RE);
+  var result = void 0;
+
+  if (attr) {
+    result = $(matchingSelector).attr(attr[1]);
+  } else {
+    // otherwise use the text of the node
+    result = $(matchingSelector).text();
+  }
+
+  // Allow custom extractor to skip default cleaner
+  // for this type; defaults to true
+  if (defaultCleaner) {
+    return Cleaners[type](result, opts);
+  }
+
+  return result;
+}
+
+function extractResult(opts) {
+  var type = opts.type;
+  var extractor = opts.extractor;
+
+  // If nothing matches the selector,
+  // run the Generic extraction
+
+  return select(_extends({}, opts, { extractionOpts: extractor[type] })) || GenericExtractor[type](opts);
+}
+
+var RootExtractor = {
+  extract: function extract() {
+    var extractor = arguments.length <= 0 || arguments[0] === undefined ? GenericExtractor : arguments[0];
+    var opts = arguments[1];
+    var _opts = opts;
+    var contentOnly = _opts.contentOnly;
+    var extractedTitle = _opts.extractedTitle;
+    // This is the generic extractor. Run its extract method
+
+    if (extractor.domain === '*') return extractor.extract(opts);
+
+    opts = _extends({}, opts, {
+      extractor: extractor
+    });
+
+    if (contentOnly) {
+      var _content = extractResult(_extends({}, opts, { type: 'content', extractHtml: true, title: extractedTitle
+      }));
+      return {
+        content: _content
+      };
+    }
+    var title = extractResult(_extends({}, opts, { type: 'title' }));
+    var date_published = extractResult(_extends({}, opts, { type: 'date_published' }));
+    var author = extractResult(_extends({}, opts, { type: 'author' }));
+    var next_page_url = extractResult(_extends({}, opts, { type: 'next_page_url' }));
+    var content = extractResult(_extends({}, opts, { type: 'content', extractHtml: true, title: title
+    }));
+    var lead_image_url = extractResult(_extends({}, opts, { type: 'lead_image_url', content: content }));
+    var dek = extractResult(_extends({}, opts, { type: 'dek', content: content }));
+    var excerpt = extractResult(_extends({}, opts, { type: 'excerpt', content: content }));
+    var word_count = extractResult(_extends({}, opts, { type: 'word_count', content: content }));
+    var direction = extractResult(_extends({}, opts, { type: 'direction', title: title }));
+
+    var _extractResult = extractResult(_extends({}, opts, { type: 'url_and_domain' }));
+
+    var url = _extractResult.url;
+    var domain = _extractResult.domain;
+
+
+    return {
+      title: title,
+      content: content,
+      author: author,
+      date_published: date_published,
+      lead_image_url: lead_image_url,
+      dek: dek,
+      next_page_url: next_page_url,
+      url: url,
+      domain: domain,
+      excerpt: excerpt,
+      word_count: word_count,
+      direction: direction
+    };
+  }
+};
+
+var collectAllPages = (function () {
+  var _ref = asyncToGenerator(regeneratorRuntime.mark(function _callee(_ref2) {
+    var next_page_url = _ref2.next_page_url;
+    var html = _ref2.html;
+    var $ = _ref2.$;
+    var metaCache = _ref2.metaCache;
+    var result = _ref2.result;
+    var Extractor = _ref2.Extractor;
+    var title = _ref2.title;
+    var url = _ref2.url;
+    var pages, previousUrls, extractorOpts, nextPageResult;
+    return regeneratorRuntime.wrap(function _callee$(_context) {
+      while (1) {
+        switch (_context.prev = _context.next) {
+          case 0:
+            // At this point, we've fetched just the first page
+            pages = 1;
+            previousUrls = [removeAnchor(url)];
+
+            // If we've gone over 26 pages, something has
+            // likely gone wrong.
+
+          case 2:
+            if (!(next_page_url && pages < 26)) {
+              _context.next = 15;
+              break;
+            }
+
+            pages += 1;
+            _context.next = 6;
+            return Resource.create(next_page_url);
+
+          case 6:
+            $ = _context.sent;
+
+            html = $.html();
+
+            extractorOpts = {
+              url: next_page_url,
+              html: html,
+              $: $,
+              metaCache: metaCache,
+              contentOnly: true,
+              extractedTitle: title,
+              previousUrls: previousUrls
+            };
+            nextPageResult = RootExtractor.extract(Extractor, extractorOpts);
+
+
+            previousUrls.push(next_page_url);
+            result = _extends({}, result, {
+              content: '\n        ' + result.content + '\n        <hr>\n        <h4>Page ' + pages + '</h4>\n        ' + nextPageResult.content + '\n        '
+            });
+
+            next_page_url = nextPageResult.next_page_url;
+            _context.next = 2;
+            break;
+
+          case 15:
+            return _context.abrupt('return', _extends({}, result, {
+              total_pages: pages,
+              pages_rendered: pages
+            }));
+
+          case 16:
+          case 'end':
+            return _context.stop();
+        }
+      }
+    }, _callee, this);
+  }));
+
+  function collectAllPages(_x) {
+    return _ref.apply(this, arguments);
+  }
+
+  return collectAllPages;
+})();
+
+var Mercury = {
+  parse: function parse(url, html) {
+    var _this = this;
+
+    var opts = arguments.length <= 2 || arguments[2] === undefined ? {} : arguments[2];
+    return asyncToGenerator(regeneratorRuntime.mark(function _callee() {
+      var _ref, _ref$fetchAllPages, fetchAllPages, parsedUrl, Extractor, $, metaCache, result, _result, title, next_page_url;
+
+      return regeneratorRuntime.wrap(function _callee$(_context) {
+        while (1) {
+          switch (_context.prev = _context.next) {
+            case 0:
+              _ref = opts || true;
+              _ref$fetchAllPages = _ref.fetchAllPages;
+              fetchAllPages = _ref$fetchAllPages === undefined ? true : _ref$fetchAllPages;
+              parsedUrl = URL.parse(url);
+
+              if (validateUrl(parsedUrl)) {
+                _context.next = 6;
+                break;
+              }
+
+              return _context.abrupt('return', Errors.badUrl);
+
+            case 6:
+              Extractor = getExtractor(url, parsedUrl);
+              // console.log(`Using extractor for ${Extractor.domain}`);
+
+              _context.next = 9;
+              return Resource.create(url, html, parsedUrl);
+
+            case 9:
+              $ = _context.sent;
+
+              if (!$.error) {
+                _context.next = 12;
+                break;
+              }
+
+              return _context.abrupt('return', $);
+
+            case 12:
+
+              html = $.html();
+
+              // Cached value of every meta name in our document.
+              // Used when extracting title/author/date_published/dek
+              metaCache = $('meta').map(function (_, node) {
+                return $(node).attr('name');
+              }).toArray();
+              result = RootExtractor.extract(Extractor, { url: url, html: html, $: $, metaCache: metaCache, parsedUrl: parsedUrl });
+              _result = result;
+              title = _result.title;
+              next_page_url = _result.next_page_url;
+
+              // Fetch more pages if next_page_url found
+
+              if (!(fetchAllPages && next_page_url)) {
+                _context.next = 24;
+                break;
+              }
+
+              _context.next = 21;
+              return collectAllPages({
+                Extractor: Extractor,
+                next_page_url: next_page_url,
+                html: html,
+                $: $,
+                metaCache: metaCache,
+                result: result,
+                title: title,
+                url: url
+              });
+
+            case 21:
+              result = _context.sent;
+              _context.next = 25;
+              break;
+
+            case 24:
+              result = _extends({}, result, {
+                total_pages: 1,
+                rendered_pages: 1
+              });
+
+            case 25:
+              return _context.abrupt('return', result);
+
+            case 26:
+            case 'end':
+              return _context.stop();
+          }
+        }
+      }, _callee, _this);
+    }))();
+  }
+};
+
+module.exports = Mercury;
+//# sourceMappingURL=mercury.js.map
diff --git a/dist/mercury.js.map b/dist/mercury.js.map
new file mode 100644
index 00000000..46ce9873
--- /dev/null
+++ b/dist/mercury.js.map
@@ -0,0 +1 @@
+{"version":3,"file":null,"sources":["../src/utils/range.js","../src/utils/validate-url.js","../src/utils/errors.js","../src/resource/utils/constants.js","../src/resource/utils/fetch-resource.js","../src/resource/utils/dom/normalize-meta-tags.js","../src/resource/utils/dom/constants.js","../src/resource/utils/dom/convert-lazy-loaded-images.js","../src/resource/utils/dom/clean.js","../src/resource/index.js","../src/extractors/custom/nymag.com/index.js","../src/extractors/custom/blogspot.com/index.js","../src/extractors/custom/wikipedia.org/index.js","../src/extractors/custom/twitter.com/index.js","../src/extractors/all.js","../src/utils/dom/constants.js","../src/utils/dom/strip-unlikely-candidates.js","../src/utils/dom/brs-to-ps.js","../src/utils/dom/paragraphize.js","../src/utils/dom/convert-to-paragraphs.js","../src/utils/dom/convert-node-to.js","../src/utils/dom/clean-images.js","../src/utils/dom/strip-junk-tags.js","../src/utils/dom/clean-h-ones.js","../src/utils/dom/clean-attributes.js","../src/utils/dom/remove-empty.js","../src/extractors/generic/content/scoring/constants.js","../src/extractors/generic/content/scoring/get-weight.js","../src/extractors/generic/content/scoring/get-score.js","../src/extractors/generic/content/scoring/score-commas.js","../src/extractors/generic/content/scoring/score-length.js","../src/extractors/generic/content/scoring/score-paragraph.js","../src/extractors/generic/content/scoring/set-score.js","../src/extractors/generic/content/scoring/add-score.js","../src/extractors/generic/content/scoring/add-to-parent.js","../src/extractors/generic/content/scoring/get-or-init-score.js","../src/extractors/generic/content/scoring/score-node.js","../src/extractors/generic/content/scoring/score-content.js","../src/utils/text/normalize-spaces.js","../src/utils/text/extract-from-url.js","../src/utils/text/constants.js","../src/utils/text/page-num-from-url.js","../src/utils/text/remove-anchor.js","../src/utils/text/article-base-url.js","../src/utils/text/has-sentence-end.js","../src/extractors/generic/content/scoring/merge-siblings.js","../src/extractors/generic/content/scoring/find-top-candidate.js","../src/utils/dom/clean-tags.js","../src/utils/dom/clean-headers.js","../src/utils/dom/rewrite-top-level.js","../src/utils/dom/make-links-absolute.js","../src/utils/dom/link-density.js","../src/utils/dom/extract-from-meta.js","../src/utils/dom/extract-from-selectors.js","../src/utils/dom/strip-tags.js","../src/utils/dom/within-comment.js","../src/utils/dom/node-is-sufficient.js","../src/utils/dom/is-wordpress.js","../src/cleaners/constants.js","../src/cleaners/author.js","../src/cleaners/lead-image-url.js","../src/cleaners/dek.js","../src/cleaners/date-published.js","../src/cleaners/content.js","../src/cleaners/title.js","../src/cleaners/resolve-split-title.js","../src/cleaners/index.js","../src/extractors/generic/content/extract-best-node.js","../src/extractors/generic/content/extractor.js","../src/extractors/generic/title/constants.js","../src/extractors/generic/title/extractor.js","../src/extractors/generic/author/constants.js","../src/extractors/generic/author/extractor.js","../src/extractors/generic/date-published/constants.js","../src/extractors/generic/date-published/extractor.js","../src/extractors/generic/dek/extractor.js","../src/extractors/generic/lead-image-url/constants.js","../src/extractors/generic/lead-image-url/score-image.js","../src/extractors/generic/lead-image-url/extractor.js","../src/extractors/generic/next-page-url/scoring/utils/score-similarity.js","../src/extractors/generic/next-page-url/scoring/utils/score-link-text.js","../src/extractors/generic/next-page-url/scoring/utils/score-page-in-link.js","../src/extractors/generic/next-page-url/scoring/constants.js","../src/extractors/generic/next-page-url/scoring/utils/score-extraneous-links.js","../src/extractors/generic/next-page-url/scoring/utils/score-by-parents.js","../src/extractors/generic/next-page-url/scoring/utils/score-prev-link.js","../src/extractors/generic/next-page-url/scoring/utils/should-score.js","../src/extractors/generic/next-page-url/scoring/utils/score-base-url.js","../src/extractors/generic/next-page-url/scoring/utils/score-next-link-text.js","../src/extractors/generic/next-page-url/scoring/utils/score-cap-links.js","../src/extractors/generic/next-page-url/scoring/score-links.js","../src/extractors/generic/next-page-url/extractor.js","../src/extractors/generic/url/constants.js","../src/extractors/generic/url/extractor.js","../src/extractors/generic/excerpt/constants.js","../src/extractors/generic/excerpt/extractor.js","../src/extractors/generic/word-count/extractor.js","../src/extractors/generic/index.js","../src/extractors/get-extractor.js","../src/extractors/constants.js","../src/extractors/root-extractor.js","../src/extractors/collect-all-pages.js","../src/mercury.js"],"sourcesContent":["export default function* range(start = 1, end = 1) {\n  while (start <= end) {\n    yield start += 1;\n  }\n}\n","// extremely simple url validation as a first step\nexport default function validateUrl({ hostname }) {\n  // If this isn't a valid url, return an error message\n  return !!hostname;\n}\n","const Errors = {\n  badUrl: {\n    error: true,\n    messages: 'The url parameter passed does not look like a valid URL. Please check your data and try again.',\n  },\n};\n\nexport default Errors;\n","export const REQUEST_HEADERS = {\n  'User-Agent': 'Readability - http://readability.com/about/',\n};\n\n// The number of milliseconds to attempt to fetch a resource before timing out.\nexport const FETCH_TIMEOUT = 10000;\n\n// Content types that we do not extract content from\nconst BAD_CONTENT_TYPES = [\n  'audio/mpeg',\n  'image/gif',\n  'image/jpeg',\n  'image/jpg',\n];\n\nexport const BAD_CONTENT_TYPES_RE = new RegExp(`^(${BAD_CONTENT_TYPES.join('|')})$`, 'i');\n\n\n// Use this setting as the maximum size an article can be\n// for us to attempt parsing. Defaults to 5 MB.\nexport const MAX_CONTENT_LENGTH = 5242880;\n\n// Turn the global proxy on or off\n// Proxying is not currently enabled in Python source\n// so not implementing logic in port.\nexport const PROXY_DOMAINS = false;\nexport const REQUESTS_PROXIES = {\n  http: 'http://38.98.105.139:33333',\n  https: 'http://38.98.105.139:33333',\n};\n\nexport const DOMAINS_TO_PROXY = [\n  'nih.gov',\n  'gutenberg.org',\n];\n","import 'babel-polyfill';\n\nimport URL from 'url';\nimport request from 'request';\nimport { Errors } from 'utils';\n\nimport {\n  REQUEST_HEADERS,\n  FETCH_TIMEOUT,\n  BAD_CONTENT_TYPES_RE,\n  MAX_CONTENT_LENGTH,\n} from './constants';\n\nfunction get(options) {\n  return new Promise((resolve, reject) => {\n    request(options, (err, response, body) => {\n      if (err) {\n        reject(err);\n      } else {\n        resolve({ body, response });\n      }\n    });\n  });\n}\n\n// Evaluate a response to ensure it's something we should be keeping.\n// This does not validate in the sense of a response being 200 level or\n// not. Validation here means that we haven't found reason to bail from\n// further processing of this url.\n\nexport function validateResponse(response, parseNon2xx = false) {\n  // Check if we got a valid status code\n  if (response.statusMessage !== 'OK') {\n    if (!response.statusCode) {\n      throw new Error(\n        `Unable to fetch content. Original exception was ${response.error}`\n      );\n    } else if (!parseNon2xx) {\n      throw new Error(\n        `Resource returned a response status code of ${response.statusCode} and resource was instructed to reject non-2xx level status codes.`\n      );\n    }\n  }\n\n  const {\n    'content-type': contentType,\n    'content-length': contentLength,\n  } = response.headers;\n\n  // Check that the content is not in BAD_CONTENT_TYPES\n  if (BAD_CONTENT_TYPES_RE.test(contentType)) {\n    throw new Error(\n      `Content-type for this resource was ${contentType} and is not allowed.`\n    );\n  }\n\n  // Check that the content length is below maximum\n  if (contentLength > MAX_CONTENT_LENGTH) {\n    throw new Error(\n      `Content for this resource was too large. Maximum content length is ${MAX_CONTENT_LENGTH}.`\n    );\n  }\n\n  return true;\n}\n\n// Grabs the last two pieces of the URL and joins them back together\n// This is to get the 'livejournal.com' from 'erotictrains.livejournal.com'\nexport function baseDomain({ host }) {\n  return host.split('.').slice(-2).join('.');\n}\n\n// Set our response attribute to the result of fetching our URL.\n// TODO: This should gracefully handle timeouts and raise the\n//       proper exceptions on the many failure cases of HTTP.\n// TODO: Ensure we are not fetching something enormous. Always return\n//       unicode content for HTML, with charset conversion.\n\nexport default async function fetchResource(url, parsedUrl) {\n  parsedUrl = parsedUrl || URL.parse(encodeURI(url));\n\n  const options = {\n    url: parsedUrl,\n    headers: { ...REQUEST_HEADERS },\n    timeout: FETCH_TIMEOUT,\n    // Don't set encoding; fixes issues\n    // w/gzipped responses\n    encoding: null,\n    // Accept cookies\n    jar: true,\n    // Accept and decode gzip\n    gzip: true,\n    // Follow any redirect\n    followAllRedirects: true,\n  };\n\n  const { response, body } = await get(options);\n\n  try {\n    validateResponse(response);\n    return { body, response };\n  } catch (e) {\n    return Errors.badUrl;\n  }\n}\n","function convertMetaProp($, from, to) {\n  $(`meta[${from}]`).each((_, node) => {\n    const $node = $(node);\n\n    const value = $node.attr(from);\n    $node.attr(to, value);\n    $node.removeAttr(from);\n  });\n\n  return $;\n}\n\n// For ease of use in extracting from meta tags,\n// replace the \"content\" attribute on meta tags with the\n// \"value\" attribute.\n//\n// In addition, normalize 'property' attributes to 'name' for ease of\n// querying later. See, e.g., og or twitter meta tags.\n\nexport default function normalizeMetaTags($) {\n  $ = convertMetaProp($, 'content', 'value');\n  $ = convertMetaProp($, 'property', 'name');\n  return $;\n}\n","export const IS_LINK = new RegExp('https?://', 'i');\nexport const IS_IMAGE = new RegExp('.(png|gif|jpe?g)', 'i');\n\nexport const TAGS_TO_REMOVE = [\n  'script',\n  'style',\n  'form',\n].join(',');\n","import 'babel-polyfill';\n\nimport {\n  IS_LINK,\n  IS_IMAGE,\n} from './constants';\n\n// Convert all instances of images with potentially\n// lazy loaded images into normal images.\n// Many sites will have img tags with no source, or an image tag with a src\n// attribute that a is a placeholer. We need to be able to properly fill in\n// the src attribute so the images are no longer lazy loaded.\nexport default function convertLazyLoadedImages($) {\n  $('img').each((_, img) => {\n    Reflect.ownKeys(img.attribs).forEach((attr) => {\n      const value = img.attribs[attr];\n\n      if (attr !== 'src' && IS_LINK.test(value) &&\n          IS_IMAGE.test(value)) {\n        $(img).attr('src', value);\n      }\n    });\n  });\n\n  return $;\n}\n","import { TAGS_TO_REMOVE } from './constants';\n\nfunction isComment(index, node) {\n  return node.type === 'comment';\n}\n\nfunction cleanComments($) {\n  $.root().find('*')\n          .contents()\n          .filter(isComment)\n          .remove();\n\n  return $;\n}\n\nexport default function clean($) {\n  $(TAGS_TO_REMOVE).remove();\n\n  $ = cleanComments($);\n  return $;\n}\n","import 'babel-polyfill';\n\nimport cheerio from 'cheerio';\n\nimport { fetchResource } from './utils';\nimport {\n  normalizeMetaTags,\n  convertLazyLoadedImages,\n  clean,\n} from './utils/dom';\n\nconst Resource = {\n\n  // Create a Resource.\n  //\n  // :param url: The URL for the document we should retrieve.\n  // :param response: If set, use as the response rather than\n  //                  attempting to fetch it ourselves. Expects a\n  //                  string.\n  async create(url, preparedResponse, parsedUrl) {\n    let result;\n\n    if (preparedResponse) {\n      const validResponse = {\n        statusMessage: 'OK',\n        statusCode: 200,\n        headers: {\n          'content-type': 'text/html',\n          'content-length': 500,\n        },\n      };\n\n      result = { body: preparedResponse, response: validResponse };\n    } else {\n      result = await fetchResource(url, parsedUrl);\n    }\n\n    if (result.error) {\n      return result;\n    }\n\n    return this.generateDoc(result);\n  },\n\n  generateDoc({ body: content, response }) {\n    const { 'content-type': contentType } = response.headers;\n\n    // TODO: Implement is_text function from\n    // https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57\n    if (!contentType.includes('html') &&\n        !contentType.includes('text')) {\n      throw new Error('Content does not appear to be text.');\n    }\n\n    let $ = cheerio.load(content, { normalizeWhitespace: true });\n\n    if ($.root().children().length === 0) {\n      throw new Error('No children, likely a bad parse.');\n    }\n\n    $ = normalizeMetaTags($);\n    $ = convertLazyLoadedImages($);\n    $ = clean($);\n\n    return $;\n  },\n};\n\nexport default Resource;\n","const NYMagExtractor = {\n  domain: 'nymag.com',\n  content: {\n    // Order by most likely. Extractor will stop on first occurence\n    selectors: [\n      'div.article-content',\n      'section.body',\n      'article.article',\n    ],\n\n    // Selectors to remove from the extracted content\n    clean: [\n      '.ad',\n      '.single-related-story',\n    ],\n\n    // Object of tranformations to make on matched elements\n    // Each key is the selector, each value is the tag to\n    // transform to.\n    // If a function is given, it should return a string\n    // to convert to or nothing (in which case it will not perform\n    // the transformation.\n    transforms: {\n      // Convert h1s to h2s\n      h1: 'h2',\n\n      // Convert lazy-loaded noscript images to figures\n      noscript: ($node) => {\n        const $children = $node.children();\n        if ($children.length === 1 && $children.get(0).tagName === 'img') {\n          return 'figure';\n        }\n\n        return null;\n      },\n    },\n  },\n\n  title: {\n    selectors: [\n      'h1.lede-feature-title',\n      'h1.headline-primary',\n      'h1',\n    ],\n  },\n\n  author: {\n    selectors: [\n      '.by-authors',\n      '.lede-feature-author',\n    ],\n  },\n\n  dek: {\n    selectors: [\n      '.lede-feature-teaser',\n    ],\n  },\n\n  date_published: {\n    selectors: [\n      'time.article-timestamp[datetime]',\n      'time.article-timestamp',\n    ],\n  },\n};\n\nexport default NYMagExtractor;\n","const BloggerExtractor = {\n  domain: 'blogspot.com',\n  content: {\n    // Blogger is insane and does not load its content\n    // initially in the page, but it's all there\n    // in noscript\n    selectors: [\n      '.post-content noscript',\n    ],\n\n    // Selectors to remove from the extracted content\n    clean: [\n    ],\n\n    // Convert the noscript tag to a div\n    transforms: {\n      noscript: 'div',\n    },\n  },\n\n  author: {\n    selectors: [\n      '.post-author-name',\n    ],\n  },\n\n  title: {\n    selectors: [\n      'h2.title',\n    ],\n  },\n\n  date_published: {\n    selectors: [\n      'span.publishdate',\n    ],\n  },\n};\n\nexport default BloggerExtractor;\n","const WikipediaExtractor = {\n  domain: 'wikipedia.org',\n  content: {\n    selectors: [\n      '#mw-content-text',\n    ],\n\n    defaultCleaner: false,\n\n    // transform top infobox to an image with caption\n    transforms: {\n      '.infobox img': ($node) => {\n        const $parent = $node.parents('.infobox');\n        // Only prepend the first image in .infobox\n        if ($parent.children('img').length === 0) {\n          $parent.prepend($node);\n        }\n      },\n      '.infobox caption': 'figcaption',\n      '.infobox': 'figure',\n    },\n\n    // Selectors to remove from the extracted content\n    clean: [\n      '.mw-editsection',\n      'figure tr, figure td, figure tbody',\n      '#toc',\n    ],\n\n  },\n\n  author: 'Wikipedia Contributors',\n\n  title: {\n    selectors: [\n      'h2.title',\n    ],\n  },\n\n  date_published: {\n    selectors: [\n      '#footer-info-lastmod',\n    ],\n  },\n\n};\n\nexport default WikipediaExtractor;\n","const TwitterExtractor = {\n  domain: 'twitter.com',\n\n  content: {\n    transforms: {\n      // We're transforming essentially the whole page here.\n      // Twitter doesn't have nice selectors, so our initial\n      // selector grabs the whole page, then we're re-writing\n      // it to fit our needs before we clean it up.\n      '.permalink[role=main]': ($node, $) => {\n        const tweets = $node.find('.tweet');\n        const $tweetContainer = $('<div id=\"TWEETS_GO_HERE\"></div>');\n        $tweetContainer.append(tweets);\n        $node.replaceWith($tweetContainer);\n      },\n\n      // Twitter wraps @ with s, which\n      // renders as a strikethrough\n      s: 'span',\n    },\n\n    selectors: [\n      '.permalink[role=main]',\n    ],\n\n    defaultCleaner: false,\n\n    clean: [\n      '.stream-item-footer',\n      'button',\n      '.tweet-details-fixer',\n    ],\n  },\n\n  author: {\n    selectors: [\n      '.tweet.permalink-tweet .username',\n    ],\n  },\n\n  date_published: {\n    selectors: [\n      '.permalink-tweet ._timestamp[data-time-ms]',\n      // '.tweet.permalink-tweet .metadata',\n    ],\n  },\n\n};\n\n\nexport default TwitterExtractor;\n","import NYMagExtractor from './custom/nymag.com';\nimport BloggerExtractor from './custom/blogspot.com';\nimport WikipediaExtractor from './custom/wikipedia.org';\nimport TwitterExtractor from './custom/twitter.com';\n\nconst Extractors = {\n  'nymag.com': NYMagExtractor,\n  'blogspot.com': BloggerExtractor,\n  'wikipedia.org': WikipediaExtractor,\n  'twitter.com': TwitterExtractor,\n};\n\nexport default Extractors;\n","// Spacer images to be removed\nexport const SPACER_RE = new RegExp('trans|transparent|spacer|blank', 'i');\n\n// A list of tags to strip from the output if we encounter them.\nexport const STRIP_OUTPUT_TAGS = [\n  'title',\n  'script',\n  'noscript',\n  'link',\n  'style',\n  'hr',\n  'embed',\n  'iframe',\n  'object',\n];\n\n// cleanAttributes\nexport const REMOVE_ATTRS = ['style', 'align'];\nexport const REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(selector => `[${selector}]`);\nexport const REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');\nexport const WHITELIST_ATTRS = ['src', 'href', 'class', 'id', 'score'];\nexport const WHITELIST_ATTRS_RE = new RegExp(`^(${WHITELIST_ATTRS.join('|')})$`, 'i');\n\n// removeEmpty\nexport const REMOVE_EMPTY_TAGS = ['p'];\nexport const REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(tag => `${tag}:empty`).join(',');\n\n// cleanTags\nexport const CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');\n\n// cleanHeaders\nconst HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];\nexport const HEADER_TAG_LIST = HEADER_TAGS.join(',');\n\n\n// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n  'ad-break',\n  'adbox',\n  'advert',\n  'addthis',\n  'agegate',\n  'aux',\n  'blogger-labels',\n  'combx',\n  'comment',\n  'conversation',\n  'disqus',\n  'entry-unrelated',\n  'extra',\n  'foot',\n  // 'form', // This is too generic, has too many false positives\n  'header',\n  'hidden',\n  'loader',\n  'login',                     // Note: This can hit 'blogindex'.\n  'menu',\n  'meta',\n  'nav',\n  'outbrain',\n  'pager',\n  'pagination',\n  'predicta',                  // readwriteweb inline ad box\n  'presence_control_external', // lifehacker.com container full of false positives\n  'popup',\n  'printfriendly',\n  'related',\n  'remove',\n  'remark',\n  'rss',\n  'share',\n  'shoutbox',\n  'sidebar',\n  'sociable',\n  'sponsor',\n  'taboola',\n  'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n  'and',\n  'article',\n  'body',\n  'blogindex',\n  'column',\n  'content',\n  'entry-content-asset',\n  'format', // misuse of form\n  'hfeed',\n  'hentry',\n  'hatom',\n  'main',\n  'page',\n  'posts',\n  'shadow',\n];\n\n// A list of tags which, if found inside, should cause a <div /> to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into <p /> tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n  'a',\n  'blockquote',\n  'dl',\n  'div',\n  'img',\n  'p',\n  'pre',\n  'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n  'br',\n  'b',\n  'i',\n  'label',\n  'hr',\n  'area',\n  'base',\n  'basefont',\n  'input',\n  'img',\n  'link',\n  'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE =\n  new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n  ['.hentry', '.entry-content'],\n  ['entry', '.entry-content'],\n  ['.entry', '.entry_content'],\n  ['.post', '.postbody'],\n  ['.post', '.post_body'],\n  ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = [\n  'figure',\n  'photo',\n  'image',\n  'caption',\n];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n  'article',\n  'articlecontent',\n  'instapaper_body',\n  'blog',\n  'body',\n  'content',\n  'entry-content-asset',\n  'entry',\n  'hentry',\n  'main',\n  'Normal',\n  'page',\n  'pagination',\n  'permalink',\n  'post',\n  'story',\n  'text',\n  '[-_]copy', // usatoday\n  '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n  'adbox',\n  'advert',\n  'author',\n  'bio',\n  'bookmark',\n  'bottom',\n  'byline',\n  'clear',\n  'com-',\n  'combx',\n  'comment',\n  'comment\\\\B',\n  'contact',\n  'copy',\n  'credit',\n  'crumb',\n  'date',\n  'deck',\n  'excerpt',\n  'featured', // tnr.com has a featured_content which throws us off\n  'foot',\n  'footer',\n  'footnote',\n  'graf',\n  'head',\n  'info',\n  'infotext', // newscientist.com copyright\n  'instapaper_ignore',\n  'jump',\n  'linebreak',\n  'link',\n  'masthead',\n  'media',\n  'meta',\n  'modal',\n  'outbrain', // slate.com junk\n  'promo',\n  'pr_', // autoblog - press release\n  'related',\n  'respond',\n  'roundcontent', // lifehacker restricted content warning\n  'scroll',\n  'secondary',\n  'share',\n  'shopping',\n  'shoutbox',\n  'side',\n  'sidebar',\n  'sponsor',\n  'stamp',\n  'sub',\n  'summary',\n  'tags',\n  'tools',\n  'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// XPath to try to determine if a page is wordpress. Not always successful.\nexport const IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nexport const EXTRANEOUS_LINK_HINTS = [\n  'print',\n  'archive',\n  'comment',\n  'discuss',\n  'e-mail',\n  'email',\n  'share',\n  'reply',\n  'all',\n  'login',\n  'sign',\n  'single',\n  'adx',\n  'entry-unrelated',\n];\nexport const EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nexport const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\n// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))', 'i');\nexport const NEXT_LINK_TEXT_RE = /(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))/i;\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nexport const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nexport const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match 2 or more consecutive <br> tags\nexport const BR_TAGS_RE = new RegExp('(<br[^>]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp('<br[^>]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n  'article',\n  'aside',\n  'blockquote',\n  'body',\n  'br',\n  'button',\n  'canvas',\n  'caption',\n  'col',\n  'colgroup',\n  'dd',\n  'div',\n  'dl',\n  'dt',\n  'embed',\n  'fieldset',\n  'figcaption',\n  'figure',\n  'footer',\n  'form',\n  'h1',\n  'h2',\n  'h3',\n  'h4',\n  'h5',\n  'h6',\n  'header',\n  'hgroup',\n  'hr',\n  'li',\n  'map',\n  'object',\n  'ol',\n  'output',\n  'p',\n  'pre',\n  'progress',\n  'section',\n  'table',\n  'tbody',\n  'textarea',\n  'tfoot',\n  'th',\n  'thead',\n  'tr',\n  'ul',\n  'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');\n\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');\n\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import {\n  CANDIDATES_WHITELIST,\n  CANDIDATES_BLACKLIST,\n} from './constants';\n\nexport default function stripUnlikelyCandidates($) {\n  //  Loop through the provided document and remove any non-link nodes\n  //  that are unlikely candidates for article content.\n  //\n  //  Links are ignored because there are very often links to content\n  //  that are identified as non-body-content, but may be inside\n  //  article-like content.\n  //\n  //  :param $: a cheerio object to strip nodes from\n  //  :return $: the cleaned cheerio object\n  $('*').not('a').each((index, node) => {\n    const $node = $(node);\n    const classes = $node.attr('class');\n    const id = $node.attr('id');\n    if (!id && !classes) return;\n\n    const classAndId = `${classes || ''} ${id || ''}`;\n    if (CANDIDATES_WHITELIST.test(classAndId)) {\n      return;\n    } else if (CANDIDATES_BLACKLIST.test(classAndId)) {\n      $node.remove();\n    }\n  });\n\n  return $;\n}\n","import { paragraphize } from './index';\n\n// ## NOTES:\n// Another good candidate for refactoring/optimizing.\n// Very imperative code, I don't love it. - AP\n\n\n//  Given cheerio object, convert consecutive <br /> tags into\n//  <p /> tags instead.\n//\n//  :param $: A cheerio object\n\nexport default function brsToPs($) {\n  let collapsing = false;\n  $('br').each((index, element) => {\n    const nextElement = $(element).next().get(0);\n\n    if (nextElement && nextElement.tagName === 'br') {\n      collapsing = true;\n      $(element).remove();\n    } else if (collapsing) {\n      collapsing = false;\n      // $(element).replaceWith('<p />')\n      paragraphize(element, $, true);\n    }\n  });\n\n  return $;\n}\n","import { BLOCK_LEVEL_TAGS_RE } from './constants';\n\n// Given a node, turn it into a P if it is not already a P, and\n// make sure it conforms to the constraints of a P tag (I.E. does\n// not contain any other block tags.)\n//\n// If the node is a <br />, it treats the following inline siblings\n// as if they were its children.\n//\n// :param node: The node to paragraphize; this is a raw node\n// :param $: The cheerio object to handle dom manipulation\n// :param br: Whether or not the passed node is a br\n\nexport default function paragraphize(node, $, br = false) {\n  const $node = $(node);\n\n  if (br) {\n    let sibling = node.nextSibling;\n    const p = $('<p></p>');\n\n    // while the next node is text or not a block level element\n    // append it to a new p node\n    while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))) {\n      const nextSibling = sibling.nextSibling;\n      $(sibling).appendTo(p);\n      sibling = nextSibling;\n    }\n\n    $node.replaceWith(p);\n    $node.remove();\n    return $;\n  }\n\n  return $;\n}\n","import { brsToPs, convertNodeTo } from 'utils/dom';\n\nimport { DIV_TO_P_BLOCK_TAGS } from './constants';\n\nfunction convertDivs($) {\n  $('div').each((index, div) => {\n    const $div = $(div);\n    const convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;\n\n    if (convertable) {\n      convertNodeTo($div, $, 'p');\n    }\n  });\n\n  return $;\n}\n\nfunction convertSpans($) {\n  $('span').each((index, span) => {\n    const $span = $(span);\n    const convertable = $span.parents('p, div').length === 0;\n    if (convertable) {\n      convertNodeTo($span, $, 'p');\n    }\n  });\n\n  return $;\n}\n\n// Loop through the provided doc, and convert any p-like elements to\n// actual paragraph tags.\n//\n//   Things fitting this criteria:\n//   * Multiple consecutive <br /> tags.\n//   * <div /> tags without block level elements inside of them\n//   * <span /> tags who are not children of <p /> or <div /> tags.\n//\n//   :param $: A cheerio object to search\n//   :return cheerio object with new p elements\n//   (By-reference mutation, though. Returned just for convenience.)\n\nexport default function convertToParagraphs($) {\n  $ = brsToPs($);\n  $ = convertDivs($);\n  $ = convertSpans($);\n\n  return $;\n}\n","import 'babel-polyfill';\n\nexport default function convertNodeTo($node, $, tag = 'p') {\n  const node = $node.get(0);\n  if (!node) {\n    return $;\n  }\n  const { attribs } = $node.get(0);\n  const attribString = Reflect.ownKeys(attribs)\n                              .map(key => `${key}=${attribs[key]}`)\n                              .join(' ');\n\n  $node.replaceWith(`<${tag} ${attribString}>${$node.contents()}</${tag}>`);\n  return $;\n}\n","import { SPACER_RE } from './constants';\n\nfunction cleanForHeight($img, $) {\n  const height = parseInt($img.attr('height'), 10);\n  const width = parseInt($img.attr('width'), 10) || 20;\n\n  // Remove images that explicitly have very small heights or\n  // widths, because they are most likely shims or icons,\n  // which aren't very useful for reading.\n  if ((height || 20) < 10 || width < 10) {\n    $img.remove();\n  } else if (height) {\n    // Don't ever specify a height on images, so that we can\n    // scale with respect to width without screwing up the\n    // aspect ratio.\n    $img.removeAttr('height');\n  }\n\n  return $;\n}\n\n// Cleans out images where the source string matches transparent/spacer/etc\n// TODO This seems very aggressive - AP\nfunction removeSpacers($img, $) {\n  if (SPACER_RE.test($img.attr('src'))) {\n    $img.remove();\n  }\n\n  return $;\n}\n\nexport default function cleanImages($article, $) {\n  $article.find('img').each((index, img) => {\n    const $img = $(img);\n\n    cleanForHeight($img, $);\n    removeSpacers($img, $);\n  });\n\n  return $;\n}\n","import {\n  STRIP_OUTPUT_TAGS,\n} from './constants';\n\nexport default function stripJunkTags(article, $) {\n  $(STRIP_OUTPUT_TAGS.join(','), article).remove();\n\n  return $;\n}\n","import { convertNodeTo } from 'utils/dom';\n\n// H1 tags are typically the article title, which should be extracted\n// by the title extractor instead. If there's less than 3 of them (<3),\n// strip them. Otherwise, turn 'em into H2s.\nexport default function cleanHOnes(article, $) {\n  const $hOnes = $('h1', article);\n\n  if ($hOnes.length < 3) {\n    $hOnes.each((index, node) => $(node).remove());\n  } else {\n    $hOnes.each((index, node) => {\n      convertNodeTo($(node), $, 'h2');\n    });\n  }\n\n  return $;\n}\n","import 'babel-polyfill';\n\nimport { WHITELIST_ATTRS_RE } from './constants';\n\nfunction removeAllButWhitelist($article) {\n  // $('*', article).each((index, node) => {\n  $article.find('*').each((index, node) => {\n    node.attribs = Reflect.ownKeys(node.attribs).reduce((acc, attr) => {\n      if (WHITELIST_ATTRS_RE.test(attr)) {\n        return { ...acc, [attr]: node.attribs[attr] };\n      }\n\n      return acc;\n    }, {});\n  });\n}\n\n// function removeAttrs(article, $) {\n//   REMOVE_ATTRS.forEach((attr) => {\n//     $(`[${attr}]`, article).removeAttr(attr);\n//   });\n// }\n\n// Remove attributes like style or align\nexport default function cleanAttributes($article) {\n  removeAllButWhitelist($article);\n\n  return $article;\n}\n","export default function removeEmpty($article, $) {\n  $article.find('p').each((index, p) => {\n    const $p = $(p);\n    if ($p.text().trim() === '') $p.remove();\n  });\n\n  return $;\n}\n","// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n  'ad-break',\n  'adbox',\n  'advert',\n  'addthis',\n  'agegate',\n  'aux',\n  'blogger-labels',\n  'combx',\n  'comment',\n  'conversation',\n  'disqus',\n  'entry-unrelated',\n  'extra',\n  'foot',\n  'form',\n  'header',\n  'hidden',\n  'loader',\n  'login',                     // Note: This can hit 'blogindex'.\n  'menu',\n  'meta',\n  'nav',\n  'pager',\n  'pagination',\n  'predicta',                  // readwriteweb inline ad box\n  'presence_control_external', // lifehacker.com container full of false positives\n  'popup',\n  'printfriendly',\n  'related',\n  'remove',\n  'remark',\n  'rss',\n  'share',\n  'shoutbox',\n  'sidebar',\n  'sociable',\n  'sponsor',\n  'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n  'and',\n  'article',\n  'body',\n  'blogindex',\n  'column',\n  'content',\n  'entry-content-asset',\n  'format', // misuse of form\n  'hfeed',\n  'hentry',\n  'hatom',\n  'main',\n  'page',\n  'posts',\n  'shadow',\n];\n\n// A list of tags which, if found inside, should cause a <div /> to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into <p /> tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n  'a',\n  'blockquote',\n  'dl',\n  'div',\n  'img',\n  'p',\n  'pre',\n  'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n  'br',\n  'b',\n  'i',\n  'label',\n  'hr',\n  'area',\n  'base',\n  'basefont',\n  'input',\n  'img',\n  'link',\n  'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE =\n  new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n  ['.hentry', '.entry-content'],\n  ['entry', '.entry-content'],\n  ['.entry', '.entry_content'],\n  ['.post', '.postbody'],\n  ['.post', '.post_body'],\n  ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = [\n  'figure',\n  'photo',\n  'image',\n  'caption',\n];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n  'article',\n  'articlecontent',\n  'instapaper_body',\n  'blog',\n  'body',\n  'content',\n  'entry-content-asset',\n  'entry',\n  'hentry',\n  'main',\n  'Normal',\n  'page',\n  'pagination',\n  'permalink',\n  'post',\n  'story',\n  'text',\n  '[-_]copy', // usatoday\n  '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n  'adbox',\n  'advert',\n  'author',\n  'bio',\n  'bookmark',\n  'bottom',\n  'byline',\n  'clear',\n  'com-',\n  'combx',\n  'comment',\n  'comment\\\\B',\n  'contact',\n  'copy',\n  'credit',\n  'crumb',\n  'date',\n  'deck',\n  'excerpt',\n  'featured', // tnr.com has a featured_content which throws us off\n  'foot',\n  'footer',\n  'footnote',\n  'graf',\n  'head',\n  'info',\n  'infotext', // newscientist.com copyright\n  'instapaper_ignore',\n  'jump',\n  'linebreak',\n  'link',\n  'masthead',\n  'media',\n  'meta',\n  'modal',\n  'outbrain', // slate.com junk\n  'promo',\n  'pr_', // autoblog - press release\n  'related',\n  'respond',\n  'roundcontent', // lifehacker restricted content warning\n  'scroll',\n  'secondary',\n  'share',\n  'shopping',\n  'shoutbox',\n  'side',\n  'sidebar',\n  'sponsor',\n  'stamp',\n  'sub',\n  'summary',\n  'tags',\n  'tools',\n  'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// Match 2 or more consecutive <br> tags\nexport const BR_TAGS_RE = new RegExp('(<br[^>]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp('<br[^>]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n  'article',\n  'aside',\n  'blockquote',\n  'body',\n  'br',\n  'button',\n  'canvas',\n  'caption',\n  'col',\n  'colgroup',\n  'dd',\n  'div',\n  'dl',\n  'dt',\n  'embed',\n  'fieldset',\n  'figcaption',\n  'figure',\n  'footer',\n  'form',\n  'h1',\n  'h2',\n  'h3',\n  'h4',\n  'h5',\n  'h6',\n  'header',\n  'hgroup',\n  'hr',\n  'li',\n  'map',\n  'object',\n  'ol',\n  'output',\n  'p',\n  'pre',\n  'progress',\n  'section',\n  'table',\n  'tbody',\n  'textarea',\n  'tfoot',\n  'th',\n  'thead',\n  'tr',\n  'ul',\n  'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');\n\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');\n\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import {\n  NEGATIVE_SCORE_RE,\n  POSITIVE_SCORE_RE,\n  PHOTO_HINTS_RE,\n  READABILITY_ASSET,\n} from './constants';\n\n\n// Get the score of a node based on its className and id.\nexport default function getWeight(node) {\n  const classes = node.attr('class');\n  const id = node.attr('id');\n  let score = 0;\n\n  if (id) {\n    // if id exists, try to score on both positive and negative\n    if (POSITIVE_SCORE_RE.test(id)) {\n      score += 25;\n    }\n    if (NEGATIVE_SCORE_RE.test(id)) {\n      score -= 25;\n    }\n  }\n\n  if (classes) {\n    if (score === 0) {\n      // if classes exist and id did not contribute to score\n      // try to score on both positive and negative\n      if (POSITIVE_SCORE_RE.test(classes)) {\n        score += 25;\n      }\n      if (NEGATIVE_SCORE_RE.test(classes)) {\n        score -= 25;\n      }\n    }\n\n    // even if score has been set by id, add score for\n    // possible photo matches\n    // \"try to keep photos if we can\"\n    if (PHOTO_HINTS_RE.test(classes)) {\n      score += 10;\n    }\n\n    // add 25 if class matches entry-content-asset,\n    // a class apparently instructed for use in the\n    // Readability publisher guidelines\n    // https://www.readability.com/developers/guidelines\n    if (READABILITY_ASSET.test(classes)) {\n      score += 25;\n    }\n  }\n\n  return score;\n}\n\n","// returns the score of a node based on\n// the node's score attribute\n// returns null if no score set\nexport default function getScore($node) {\n  return parseFloat($node.attr('score')) || null;\n}\n","// return 1 for every comma in text\nexport default function scoreCommas(text) {\n  return (text.match(/,/g) || []).length;\n}\n\n","const idkRe = new RegExp('^(p|pre)$', 'i');\n\nexport default function scoreLength(textLength, tagName = 'p') {\n  const chunks = textLength / 50;\n\n  if (chunks > 0) {\n    let lengthBonus;\n\n    // No idea why p or pre are being tamped down here\n    // but just following the source for now\n    // Not even sure why tagName is included here,\n    // since this is only being called from the context\n    // of scoreParagraph\n    if (idkRe.test(tagName)) {\n      lengthBonus = chunks - 2;\n    } else {\n      lengthBonus = chunks - 1.25;\n    }\n\n    return Math.min(Math.max(lengthBonus, 0), 3);\n  }\n\n  return 0;\n}\n\n","import {\n  scoreCommas,\n  scoreLength,\n} from './index';\n\n// Score a paragraph using various methods. Things like number of\n// commas, etc. Higher is better.\nexport default function scoreParagraph(node) {\n  let score = 1;\n  const text = node.text().trim();\n  const textLength = text.length;\n\n  // If this paragraph is less than 25 characters, don't count it.\n  if (textLength < 25) {\n    return 0;\n  }\n\n  // Add points for any commas within this paragraph\n  score += scoreCommas(text);\n\n  // For every 50 characters in this paragraph, add another point. Up\n  // to 3 points.\n  score += scoreLength(textLength);\n\n  // Articles can end with short paragraphs when people are being clever\n  // but they can also end with short paragraphs setting up lists of junk\n  // that we strip. This negative tweaks junk setup paragraphs just below\n  // the cutoff threshold.\n  if (text.slice(-1) === ':') {\n    score -= 1;\n  }\n\n  return score;\n}\n\n","\nexport default function setScore($node, $, score) {\n  $node.attr('score', score);\n  return $node;\n}\n\n","import {\n  getOrInitScore,\n  setScore,\n} from './index';\n\nexport default function addScore($node, $, amount) {\n  try {\n    const score = getOrInitScore($node, $) + amount;\n    setScore($node, $, score);\n  } catch (e) {\n    // Ignoring; error occurs in scoreNode\n  }\n\n  return $node;\n}\n","import { addScore } from './index';\n\n// Adds 1/4 of a child's score to its parent\nexport default function addToParent(node, $, score) {\n  const parent = node.parent();\n  if (parent) {\n    addScore(parent, $, score * 0.25);\n  }\n\n  return node;\n}\n","import {\n  getScore,\n  scoreNode,\n  getWeight,\n  addToParent,\n} from './index';\n\n// gets and returns the score if it exists\n// if not, initializes a score based on\n// the node's tag type\nexport default function getOrInitScore($node, $, weightNodes = true) {\n  let score = getScore($node);\n\n  if (score) {\n    return score;\n  }\n\n  score = scoreNode($node);\n\n  if (weightNodes) {\n    score += getWeight($node);\n  }\n\n  addToParent($node, $, score);\n\n  return score;\n}\n\n","import { scoreParagraph } from './index';\nimport {\n  PARAGRAPH_SCORE_TAGS,\n  CHILD_CONTENT_TAGS,\n  BAD_TAGS,\n} from './constants';\n\n// Score an individual node. Has some smarts for paragraphs, otherwise\n// just scores based on tag.\nexport default function scoreNode($node) {\n  const { tagName } = $node.get(0);\n\n  // TODO: Consider ordering by most likely.\n  // E.g., if divs are a more common tag on a page,\n  // Could save doing that regex test on every node – AP\n  if (PARAGRAPH_SCORE_TAGS.test(tagName)) {\n    return scoreParagraph($node);\n  } else if (tagName === 'div') {\n    return 5;\n  } else if (CHILD_CONTENT_TAGS.test(tagName)) {\n    return 3;\n  } else if (BAD_TAGS.test(tagName)) {\n    return -3;\n  } else if (tagName === 'th') {\n    return -5;\n  }\n\n  return 0;\n}\n","import { convertNodeTo } from 'utils/dom';\n\nimport { HNEWS_CONTENT_SELECTORS } from './constants';\nimport {\n  scoreNode,\n  setScore,\n  getOrInitScore,\n  addScore,\n} from './index';\n\nfunction convertSpans($node, $) {\n  if ($node.get(0)) {\n    const { tagName } = $node.get(0);\n\n    if (tagName === 'span') {\n      // convert spans to divs\n      convertNodeTo($node, $, 'div');\n    }\n  }\n}\n\nfunction addScoreTo($node, $, score) {\n  if ($node) {\n    convertSpans($node, $);\n    addScore($node, $, score);\n  }\n}\n\nfunction scorePs($, weightNodes) {\n  $('p, pre').not('[score]').each((index, node) => {\n    // The raw score for this paragraph, before we add any parent/child\n    // scores.\n    let $node = $(node);\n    $node = setScore($node, $, getOrInitScore($node, $, weightNodes));\n\n    const $parent = $node.parent();\n    const rawScore = scoreNode($node);\n\n    addScoreTo($parent, $, rawScore, weightNodes);\n    if ($parent) {\n      // Add half of the individual content score to the\n      // grandparent\n      addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);\n    }\n  });\n\n  return $;\n}\n\n// score content. Parents get the full value of their children's\n// content score, grandparents half\nexport default function scoreContent($, weightNodes = true) {\n  // First, look for special hNews based selectors and give them a big\n  // boost, if they exist\n  HNEWS_CONTENT_SELECTORS.forEach(([parentSelector, childSelector]) => {\n    $(`${parentSelector} ${childSelector}`).each((index, node) => {\n      addScore($(node).parent(parentSelector), $, 80);\n    });\n  });\n\n  // Doubling this again\n  // Previous solution caused a bug\n  // in which parents weren't retaining\n  // scores. This is not ideal, and\n  // should be fixed.\n  scorePs($, weightNodes);\n  scorePs($, weightNodes);\n\n  return $;\n}\n","const NORMALIZE_RE = /\\s{2,}/g;\n\nexport default function normalizeSpaces(text) {\n  return text.replace(NORMALIZE_RE, ' ').trim();\n}\n","// Given a node type to search for, and a list of regular expressions,\n// look to see if this extraction can be found in the URL. Expects\n// that each expression in r_list will return group(1) as the proper\n// string to be cleaned.\n// Only used for date_published currently.\nexport default function extractFromUrl(url, regexList) {\n  const matchRe = regexList.find(re => re.test(url));\n  if (matchRe) {\n    return matchRe.exec(url)[1];\n  }\n\n  return null;\n}\n","// An expression that looks to try to find the page digit within a URL, if\n// it exists.\n// Matches:\n//  page=1\n//  pg=1\n//  p=1\n//  paging=12\n//  pag=7\n//  pagination/1\n//  paging/88\n//  pa/83\n//  p/11\n//\n// Does not match:\n//  pg=102\n//  page:2\nexport const PAGE_IN_HREF_RE = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');\n\nexport const HAS_ALPHA_RE = /[a-z]/i;\n\nexport const IS_ALPHA_RE = /^[a-z]+$/i;\nexport const IS_DIGIT_RE = /^[0-9]+$/i;\n","import { PAGE_IN_HREF_RE } from './constants';\n\nexport default function pageNumFromUrl(url) {\n  const matches = url.match(PAGE_IN_HREF_RE);\n  if (!matches) return null;\n\n  const pageNum = parseInt(matches[6], 10);\n\n  // Return pageNum < 100, otherwise\n  // return null\n  return pageNum < 100 ? pageNum : null;\n}\n","export default function removeAnchor(url) {\n  return url.split('#')[0].replace(/\\/$/, '');\n}\n","import URL from 'url';\nimport {\n  HAS_ALPHA_RE,\n  IS_ALPHA_RE,\n  IS_DIGIT_RE,\n  PAGE_IN_HREF_RE,\n} from './constants';\n\nfunction isGoodSegment(segment, index, firstSegmentHasLetters) {\n  let goodSegment = true;\n\n  // If this is purely a number, and it's the first or second\n  // url_segment, it's probably a page number. Remove it.\n  if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {\n    goodSegment = true;\n  }\n\n  // If this is the first url_segment and it's just \"index\",\n  // remove it\n  if (index === 0 && segment.toLowerCase() === 'index') {\n    goodSegment = false;\n  }\n\n  // If our first or second url_segment is smaller than 3 characters,\n  // and the first url_segment had no alphas, remove it.\n  if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {\n    goodSegment = false;\n  }\n\n  return goodSegment;\n}\n\n// Take a URL, and return the article base of said URL. That is, no\n// pagination data exists in it. Useful for comparing to other links\n// that might have pagination data within them.\nexport default function articleBaseUrl(url, parsed) {\n  const parsedUrl = parsed || URL.parse(url);\n  const { protocol, host, path } = parsedUrl;\n\n  let firstSegmentHasLetters = false;\n  const cleanedSegments = path.split('/')\n  .reverse()\n  .reduce((acc, rawSegment, index) => {\n    let segment = rawSegment;\n\n    // Split off and save anything that looks like a file type.\n    if (segment.includes('.')) {\n      const [possibleSegment, fileExt] = segment.split('.');\n      if (IS_ALPHA_RE.test(fileExt)) {\n        segment = possibleSegment;\n      }\n    }\n\n    // If our first or second segment has anything looking like a page\n    // number, remove it.\n    if (PAGE_IN_HREF_RE.test(segment) && index < 2) {\n      segment = segment.replace(PAGE_IN_HREF_RE, '');\n    }\n\n    // If we're on the first segment, check to see if we have any\n    // characters in it. The first segment is actually the last bit of\n    // the URL, and this will be helpful to determine if we're on a URL\n    // segment that looks like \"/2/\" for example.\n    if (index === 0) {\n      firstSegmentHasLetters = HAS_ALPHA_RE.test(segment);\n    }\n\n    // If it's not marked for deletion, push it to cleaned_segments.\n    if (isGoodSegment(segment, index, firstSegmentHasLetters)) {\n      acc.push(segment);\n    }\n\n    return acc;\n  }, []);\n\n  return `${protocol}//${host}${cleanedSegments.reverse().join('/')}`;\n}\n","// Given a string, return True if it appears to have an ending sentence\n// within it, false otherwise.\nconst SENTENCE_END_RE = new RegExp('.( |$)');\nexport default function hasSentenceEnd(text) {\n  return SENTENCE_END_RE.test(text);\n}\n\n","import {\n  textLength,\n  linkDensity,\n} from 'utils/dom';\nimport { hasSentenceEnd } from 'utils/text';\n\nimport { NON_TOP_CANDIDATE_TAGS_RE } from './constants';\nimport { getScore } from './index';\n\n// Now that we have a top_candidate, look through the siblings of\n// it to see if any of them are decently scored. If they are, they\n// may be split parts of the content (Like two divs, a preamble and\n// a body.) Example:\n// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14\nexport default function mergeSiblings($candidate, topScore, $) {\n  if (!$candidate.parent().length) {\n    return $candidate;\n  }\n\n  const siblingScoreThreshold = Math.max(10, topScore * 0.25);\n  const wrappingDiv = $('<div></div>');\n\n  $candidate.parent().children().each((index, sibling) => {\n    const $sibling = $(sibling);\n    // Ignore tags like BR, HR, etc\n    if (NON_TOP_CANDIDATE_TAGS_RE.test(sibling.tagName)) {\n      return null;\n    }\n\n    const siblingScore = getScore($sibling);\n    if (siblingScore) {\n      if ($sibling === $candidate) {\n        wrappingDiv.append($sibling);\n      } else {\n        let contentBonus = 0;\n        const density = linkDensity($sibling);\n\n        // If sibling has a very low link density,\n        // give it a small bonus\n        if (density < 0.05) {\n          contentBonus += 20;\n        }\n\n        // If sibling has a high link density,\n        // give it a penalty\n        if (density >= 0.5) {\n          contentBonus -= 20;\n        }\n\n        // If sibling node has the same class as\n        // candidate, give it a bonus\n        if ($sibling.attr('class') === $candidate.attr('class')) {\n          contentBonus += topScore * 0.2;\n        }\n\n        const newScore = siblingScore + contentBonus;\n\n        if (newScore >= siblingScoreThreshold) {\n          return wrappingDiv.append($sibling);\n        } else if (sibling.tagName === 'p') {\n          const siblingContent = $sibling.text();\n          const siblingContentLength = textLength(siblingContent);\n\n          if (siblingContentLength > 80 && density < 0.25) {\n            return wrappingDiv.append($sibling);\n          } else if (siblingContentLength <= 80 && density === 0 &&\n                    hasSentenceEnd(siblingContent)) {\n            return wrappingDiv.append($sibling);\n          }\n        }\n      }\n    }\n\n    return null;\n  });\n\n  return wrappingDiv;\n}\n","import { NON_TOP_CANDIDATE_TAGS_RE } from './constants';\nimport { getScore } from './index';\nimport mergeSiblings from './merge-siblings';\n\n// After we've calculated scores, loop through all of the possible\n// candidate nodes we found and find the one with the highest score.\nexport default function findTopCandidate($) {\n  let $candidate;\n  let topScore = 0;\n\n  $('[score]').each((index, node) => {\n    // Ignore tags like BR, HR, etc\n    if (NON_TOP_CANDIDATE_TAGS_RE.test(node.tagName)) {\n      return;\n    }\n\n    const $node = $(node);\n    const score = getScore($node);\n\n    if (score > topScore) {\n      topScore = score;\n      $candidate = $node;\n    }\n  });\n\n  // If we don't have a candidate, return the body\n  // or whatever the first element is\n  if (!$candidate) {\n    return $('body') || $('*').first();\n  }\n\n  $candidate = mergeSiblings($candidate, topScore, $);\n\n  return $candidate;\n}\n","import {\n  getScore,\n  setScore,\n  getOrInitScore,\n  scoreCommas,\n} from 'extractors/generic/content/scoring';\n\nimport { CLEAN_CONDITIONALLY_TAGS } from './constants';\nimport { normalizeSpaces } from '../text';\nimport { linkDensity } from './index';\n\nfunction removeUnlessContent($node, $, weight) {\n    // Explicitly save entry-content-asset tags, which are\n    // noted as valuable in the Publisher guidelines. For now\n    // this works everywhere. We may want to consider making\n    // this less of a sure-thing later.\n  if ($node.hasClass('entry-content-asset')) {\n    return;\n  }\n\n  const content = normalizeSpaces($node.text());\n\n  if (scoreCommas(content) < 10) {\n    const pCount = $('p', $node).length;\n    const inputCount = $('input', $node).length;\n\n      // Looks like a form, too many inputs.\n    if (inputCount > (pCount / 3)) {\n      $node.remove();\n      return;\n    }\n\n    const contentLength = content.length;\n    const imgCount = $('img', $node).length;\n\n      // Content is too short, and there are no images, so\n      // this is probably junk content.\n    if (contentLength < 25 && imgCount === 0) {\n      $node.remove();\n      return;\n    }\n\n    const density = linkDensity($node);\n\n      // Too high of link density, is probably a menu or\n      // something similar.\n      // console.log(weight, density, contentLength)\n    if (weight < 25 && density > 0.2 && contentLength > 75) {\n      $node.remove();\n      return;\n    }\n\n      // Too high of a link density, despite the score being\n      // high.\n    if (weight >= 25 && density > 0.5) {\n        // Don't remove the node if it's a list and the\n        // previous sibling starts with a colon though. That\n        // means it's probably content.\n      const tagName = $node.get(0).tagName;\n      const nodeIsList = tagName === 'ol' || tagName === 'ul';\n      if (nodeIsList) {\n        const previousNode = $node.prev();\n        if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {\n          return;\n        }\n      }\n\n      $node.remove();\n      return;\n    }\n\n    const scriptCount = $('script', $node).length;\n\n      // Too many script tags, not enough content.\n    if (scriptCount > 0 && contentLength < 150) {\n      $node.remove();\n      return;\n    }\n  }\n}\n\n// Given an article, clean it of some superfluous content specified by\n// tags. Things like forms, ads, etc.\n//\n// Tags is an array of tag name's to search through. (like div, form,\n// etc)\n//\n// Return this same doc.\nexport default function cleanTags($article, $) {\n  $(CLEAN_CONDITIONALLY_TAGS, $article).each((index, node) => {\n    const $node = $(node);\n    let weight = getScore($node);\n    if (!weight) {\n      weight = getOrInitScore($node, $);\n      setScore($node, $, weight);\n    }\n\n    // drop node if its weight is < 0\n    if (weight < 0) {\n      $node.remove();\n    } else {\n      // deteremine if node seems like content\n      removeUnlessContent($node, $, weight);\n    }\n  });\n\n  return $;\n}\n\n","import { getWeight } from 'extractors/generic/content/scoring';\n\nimport { HEADER_TAG_LIST } from './constants';\nimport { normalizeSpaces } from '../text';\n\nexport default function cleanHeaders($article, $, title = '') {\n  $(HEADER_TAG_LIST, $article).each((index, header) => {\n    const $header = $(header);\n    // Remove any headers that appear before all other p tags in the\n    // document. This probably means that it was part of the title, a\n    // subtitle or something else extraneous like a datestamp or byline,\n    // all of which should be handled by other metadata handling.\n    if ($($header, $article).prevAll('p').length === 0) {\n      return $header.remove();\n    }\n\n    // Remove any headers that match the title exactly.\n    if (normalizeSpaces($(header).text()) === title) {\n      return $header.remove();\n    }\n\n    // If this header has a negative weight, it's probably junk.\n    // Get rid of it.\n    if (getWeight($(header)) < 0) {\n      return $header.remove();\n    }\n\n    return $header;\n  });\n\n  return $;\n}\n","import { convertNodeTo } from 'utils/dom';\n\n// Rewrite the tag name to div if it's a top level node like body or\n// html to avoid later complications with multiple body tags.\nexport default function rewriteTopLevel(article, $) {\n  // I'm not using context here because\n  // it's problematic when converting the\n  // top-level/root node - AP\n  $ = convertNodeTo($('html'), $, 'div');\n  $ = convertNodeTo($('body'), $, 'div');\n\n  return $;\n}\n","import URL from 'url';\n\nfunction absolutize($, rootUrl, attr, $content) {\n  $(`[${attr}]`, $content).each((_, node) => {\n    const url = node.attribs[attr];\n    const absoluteUrl = URL.resolve(rootUrl, url);\n\n    node.attribs[attr] = absoluteUrl;\n  });\n}\n\nexport default function makeLinksAbsolute($content, $, url) {\n  ['href', 'src'].forEach(attr => absolutize($, url, attr, $content));\n\n  return $content;\n}\n","\nexport function textLength(text) {\n  return text.trim()\n             .replace(/\\s+/g, ' ')\n             .length;\n}\n\n// Determines what percentage of the text\n// in a node is link text\n// Takes a node, returns a float\nexport function linkDensity($node) {\n  const totalTextLength = textLength($node.text());\n\n  const linkText = $node.find('a').text();\n  const linkLength = textLength(linkText);\n\n  if (totalTextLength > 0) {\n    return linkLength / totalTextLength;\n  } else if (totalTextLength === 0 && linkLength > 0) {\n    return 1;\n  }\n\n  return 0;\n}\n","import { stripTags } from 'utils/dom';\n\n// Given a node type to search for, and a list of meta tag names to\n// search for, find a meta tag associated.\nexport default function extractFromMeta(\n  $,\n  metaNames,\n  cachedNames,\n  cleanTags = true\n) {\n  const foundNames = metaNames.filter(name => cachedNames.indexOf(name) !== -1);\n\n  for (const name of foundNames) {\n    const type = 'name';\n    const value = 'value';\n\n    const nodes = $(`meta[${type}=\"${name}\"]`);\n\n    // Get the unique value of every matching node, in case there\n    // are two meta tags with the same name and value.\n    // Remove empty values.\n    const values =\n      nodes.map((index, node) => $(node).attr(value))\n           .toArray()\n           .filter(text => text !== '');\n\n      // If we have more than one value for the same name, we have a\n      // conflict and can't trust any of them. Skip this name. If we have\n      // zero, that means our meta tags had no values. Skip this name\n      // also.\n    if (values.length === 1) {\n      let metaValue;\n        // Meta values that contain HTML should be stripped, as they\n        // weren't subject to cleaning previously.\n      if (cleanTags) {\n        metaValue = stripTags(values[0], $);\n      } else {\n        metaValue = values[0];\n      }\n\n      return metaValue;\n    }\n  }\n\n  // If nothing is found, return null\n  return null;\n}\n","import { withinComment } from 'utils/dom';\n\nfunction isGoodNode($node, maxChildren) {\n  // If it has a number of children, it's more likely a container\n  // element. Skip it.\n  if ($node.children().length > maxChildren) {\n    return false;\n  }\n  // If it looks to be within a comment, skip it.\n  if (withinComment($node)) {\n    return false;\n  }\n\n  return true;\n}\n\n// Given a a list of selectors find content that may\n// be extractable from the document. This is for flat\n// meta-information, like author, title, date published, etc.\nexport default function extractFromSelectors(\n  $,\n  selectors,\n  maxChildren = 1,\n  textOnly = true\n) {\n  for (const selector of selectors) {\n    const nodes = $(selector);\n\n    // If we didn't get exactly one of this selector, this may be\n    // a list of articles or comments. Skip it.\n    if (nodes.length === 1) {\n      const $node = $(nodes[0]);\n\n      if (isGoodNode($node, maxChildren)) {\n        let content;\n        if (textOnly) {\n          content = $node.text();\n        } else {\n          content = $node.html();\n        }\n\n        if (content) {\n          return content;\n        }\n      }\n    }\n  }\n\n  return null;\n}\n","// strips all tags from a string of text\nexport default function stripTags(text, $) {\n  // Wrapping text in html element prevents errors when text\n  // has no html\n  const cleanText = $(`<span>${text}</span>`).text();\n  return cleanText === '' ? text : cleanText;\n}\n","export default function withinComment($node) {\n  const parents = $node.parents().toArray();\n  const commentParent = parents.find((parent) => {\n    const classAndId = `${parent.attribs.class} ${parent.attribs.id}`;\n    return classAndId.includes('comment');\n  });\n\n  return commentParent !== undefined;\n}\n","// Given a node, determine if it's article-like enough to return\n// param: node (a cheerio node)\n// return: boolean\n\nexport default function nodeIsSufficient($node) {\n  return $node.text().trim().length >= 100;\n}\n","import { IS_WP_SELECTOR } from './constants';\n\nexport default function isWordpress($) {\n  return $(IS_WP_SELECTOR).length > 0;\n}\n","// CLEAN AUTHOR CONSTANTS\nexport const CLEAN_AUTHOR_RE = /^\\s*(posted |written )?by\\s*:?\\s*(.*)/i;\n    //     author = re.sub(r'^\\s*(posted |written )?by\\s*:?\\s*(.*)(?i)',\n\n// CLEAN DEK CONSTANTS\nexport const TEXT_LINK_RE = new RegExp('http(s)?://', 'i');\n// An ordered list of meta tag names that denote likely article deks.\n// From most distinct to least distinct.\n//\n// NOTE: There are currently no meta tags that seem to provide the right\n// content consistenty enough. Two options were:\n//  - og:description\n//  - dc.description\n// However, these tags often have SEO-specific junk in them that's not\n// header-worthy like a dek is. Excerpt material at best.\nexport const DEK_META_TAGS = [\n];\n\n// An ordered list of Selectors to find likely article deks. From\n// most explicit to least explicit.\n//\n// Should be more restrictive than not, as a failed dek can be pretty\n// detrimental to the aesthetics of an article.\nexport const DEK_SELECTORS = [\n  '.entry-summary',\n];\n\n// CLEAN DATE PUBLISHED CONSTANTS\nexport const MS_DATE_STRING = /^\\d{13}$/i;\nexport const SEC_DATE_STRING = /^\\d{10}$/i;\nexport const CLEAN_DATE_STRING_RE = /^\\s*published\\s*:?\\s*(.*)/i;\nexport const TIME_MERIDIAN_SPACE_RE = /(.*\\d)(am|pm)(.*)/i;\nexport const TIME_MERIDIAN_DOTS_RE = /\\.m\\./i;\nconst months = [\n  'jan',\n  'feb',\n  'mar',\n  'apr',\n  'may',\n  'jun',\n  'jul',\n  'aug',\n  'sep',\n  'oct',\n  'nov',\n  'dec',\n];\nconst allMonths = months.join('|');\nconst timestamp1 = '[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';\nconst timestamp2 = '[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';\nexport const SPLIT_DATE_STRING =\n  new RegExp(`(${timestamp1})|(${timestamp2})|([0-9]{1,4})|(${allMonths})`, 'ig');\n\n// CLEAN TITLE CONSTANTS\n// A regular expression that will match separating characters on a\n// title, that usually denote breadcrumbs or something similar.\nexport const TITLE_SPLITTERS_RE = /(: | - | \\| )/g;\n\nexport const DOMAIN_ENDINGS_RE =\n  new RegExp('.com$|.net$|.org$|.co.uk$', 'g');\n","import { CLEAN_AUTHOR_RE } from './constants';\n\n// Take an author string (like 'By David Smith ') and clean it to\n// just the name(s): 'David Smith'.\nexport default function cleanAuthor(author) {\n  return author.replace(CLEAN_AUTHOR_RE, '$2').trim();\n}\n","import validUrl from 'valid-url';\n\nexport default function clean(leadImageUrl) {\n  leadImageUrl = leadImageUrl.trim();\n  if (validUrl.isWebUri(leadImageUrl)) {\n    return leadImageUrl;\n  }\n\n  return null;\n}\n","import { stripTags } from 'utils/dom';\n\nimport { TEXT_LINK_RE } from './constants';\n\n// Take a dek HTML fragment, and return the cleaned version of it.\n// Return None if the dek wasn't good enough.\nexport default function cleanDek(dek, { $ }) {\n  // Sanity check that we didn't get too short or long of a dek.\n  if (dek.length > 1000 || dek.length < 5) return null;\n\n  const dekText = stripTags(dek, $);\n\n  // Plain text links shouldn't exist in the dek. If we have some, it's\n  // not a good dek - bail.\n  if (TEXT_LINK_RE.test(dekText)) return null;\n\n  return dekText.trim();\n}\n","import moment from 'moment';\n// Is there a compelling reason to use moment here?\n// Mostly only being used for the isValid() method,\n// but could just check for 'Invalid Date' string.\n\nimport {\n  MS_DATE_STRING,\n  SEC_DATE_STRING,\n  CLEAN_DATE_STRING_RE,\n  SPLIT_DATE_STRING,\n  TIME_MERIDIAN_SPACE_RE,\n  TIME_MERIDIAN_DOTS_RE,\n} from './constants';\n\nexport function cleanDateString(dateString) {\n  return (dateString.match(SPLIT_DATE_STRING) || [])\n                   .join(' ')\n                   .replace(TIME_MERIDIAN_DOTS_RE, 'm')\n                   .replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3')\n                   .replace(CLEAN_DATE_STRING_RE, '$1')\n                   .trim();\n}\n\n// Take a date published string, and hopefully return a date out of\n// it. Return none if we fail.\nexport default function cleanDatePublished(dateString) {\n  // If string is in milliseconds or seconds, convert to int\n  if (MS_DATE_STRING.test(dateString) || SEC_DATE_STRING.test(dateString)) {\n    dateString = parseInt(dateString, 10);\n  }\n\n  let date = moment(new Date(dateString));\n\n  if (!date.isValid()) {\n    dateString = cleanDateString(dateString);\n    date = moment(new Date(dateString));\n  }\n\n  return date.isValid() ? date.toISOString() : null;\n}\n","import {\n  cleanAttributes,\n  cleanHeaders,\n  cleanHOnes,\n  cleanImages,\n  cleanTags,\n  removeEmpty,\n  rewriteTopLevel,\n  stripJunkTags,\n  makeLinksAbsolute,\n} from 'utils/dom';\n\n// Clean our article content, returning a new, cleaned node.\nexport default function extractCleanNode(\n  article,\n  {\n    $,\n    cleanConditionally = true,\n    title = '',\n    url = '',\n  }\n) {\n  // Rewrite the tag name to div if it's a top level node like body or\n  // html to avoid later complications with multiple body tags.\n  rewriteTopLevel(article, $);\n\n  // Drop small images and spacer images\n  cleanImages(article, $);\n\n  // Drop certain tags like <title>, etc\n  // This is -mostly- for cleanliness, not security.\n  stripJunkTags(article, $);\n\n  // H1 tags are typically the article title, which should be extracted\n  // by the title extractor instead. If there's less than 3 of them (<3),\n  // strip them. Otherwise, turn 'em into H2s.\n  cleanHOnes(article, $);\n\n  // Clean headers\n  cleanHeaders(article, $, title);\n\n  // Make links absolute\n  makeLinksAbsolute(article, $, url);\n\n  // Remove unnecessary attributes\n  cleanAttributes(article);\n\n  // We used to clean UL's and OL's here, but it was leading to\n  // too many in-article lists being removed. Consider a better\n  // way to detect menus particularly and remove them.\n  cleanTags(article, $, cleanConditionally);\n\n  // Remove empty paragraph nodes\n  removeEmpty(article, $);\n\n  return article;\n}\n","import { stripTags } from 'utils/dom';\n\nimport { TITLE_SPLITTERS_RE } from './constants';\nimport { resolveSplitTitle } from './index';\n\nexport default function cleanTitle(title, { url, $ }) {\n  // If title has |, :, or - in it, see if\n  // we can clean it up.\n  if (TITLE_SPLITTERS_RE.test(title)) {\n    title = resolveSplitTitle(title, url);\n  }\n\n  // Final sanity check that we didn't get a crazy title.\n  // if (title.length > 150 || title.length < 15) {\n  if (title.length > 150) {\n    // If we did, return h1 from the document if it exists\n    const h1 = $('h1');\n    if (h1.length === 1) {\n      title = h1.text();\n    }\n  }\n\n  // strip any html tags in the title text\n  return stripTags(title, $).trim();\n}\n\n","import URL from 'url';\nimport 'babel-polyfill';\nimport wuzzy from 'wuzzy';\n\nimport {\n  TITLE_SPLITTERS_RE,\n  DOMAIN_ENDINGS_RE,\n} from './constants';\n\nfunction extractBreadcrumbTitle(splitTitle, text) {\n  // This must be a very breadcrumbed title, like:\n  // The Best Gadgets on Earth : Bits : Blogs : NYTimes.com\n  // NYTimes - Blogs - Bits - The Best Gadgets on Earth\n  if (splitTitle.length >= 6) {\n    // Look to see if we can find a breadcrumb splitter that happens\n    // more than once. If we can, we'll be able to better pull out\n    // the title.\n    const termCounts = splitTitle.reduce((acc, titleText) => {\n      acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;\n      return acc;\n    }, {});\n\n    const [maxTerm, termCount] =\n      Reflect.ownKeys(termCounts)\n             .reduce((acc, key) => {\n               if (acc[1] < termCounts[key]) {\n                 return [key, termCounts[key]];\n               }\n\n               return acc;\n             }, [0, 0]);\n\n    // We found a splitter that was used more than once, so it\n    // is probably the breadcrumber. Split our title on that instead.\n    // Note: max_term should be <= 4 characters, so that \" >> \"\n    // will match, but nothing longer than that.\n    if (termCount >= 2 && maxTerm.length <= 4) {\n      splitTitle = text.split(maxTerm);\n    }\n\n    const splitEnds = [splitTitle[0], splitTitle.slice(-1)];\n    const longestEnd = splitEnds.reduce((acc, end) => acc.length > end.length ? acc : end, '');\n\n    if (longestEnd.length > 10) {\n      return longestEnd;\n    }\n\n    return text;\n  }\n\n  return null;\n}\n\nfunction cleanDomainFromTitle(splitTitle, url) {\n  // Search the ends of the title, looking for bits that fuzzy match\n  // the URL too closely. If one is found, discard it and return the\n  // rest.\n  //\n  // Strip out the big TLDs - it just makes the matching a bit more\n  // accurate. Not the end of the world if it doesn't strip right.\n  const { host } = URL.parse(url);\n  const nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');\n\n  const startSlug = splitTitle[0].toLowerCase().replace(' ', '');\n  const startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);\n\n  if (startSlugRatio > 0.4 && startSlug.length > 5) {\n    return splitTitle.slice(2).join('');\n  }\n\n  const endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '');\n  const endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);\n\n  if (endSlugRatio > 0.4 && endSlug.length >= 5) {\n    return splitTitle.slice(0, -2).join('');\n  }\n\n  return null;\n}\n\n// Given a title with separators in it (colons, dashes, etc),\n// resolve whether any of the segments should be removed.\nexport default function resolveSplitTitle(title, url = '') {\n  // Splits while preserving splitters, like:\n  // ['The New New York', ' - ', 'The Washington Post']\n  const splitTitle = title.split(TITLE_SPLITTERS_RE);\n  if (splitTitle.length === 1) {\n    return title;\n  }\n\n  let newTitle = extractBreadcrumbTitle(splitTitle, title);\n  if (newTitle) return newTitle;\n\n  newTitle = cleanDomainFromTitle(splitTitle, url);\n  if (newTitle) return newTitle;\n\n  // Fuzzy ratio didn't find anything, so this title is probably legit.\n  // Just return it all.\n  return title;\n}\n","import cleanAuthor from './author';\nimport cleanImage from './lead-image-url';\nimport cleanDek from './dek';\nimport cleanDatePublished from './date-published';\nimport cleanContent from './content';\nimport cleanTitle from './title';\n\nconst Cleaners = {\n  author: cleanAuthor,\n  lead_image_url: cleanImage,\n  dek: cleanDek,\n  date_published: cleanDatePublished,\n  content: cleanContent,\n  title: cleanTitle,\n};\n\n\nexport default Cleaners;\n\nexport { cleanAuthor };\nexport { cleanImage };\nexport { cleanDek };\nexport { cleanDatePublished };\nexport { cleanContent };\nexport { cleanTitle };\nexport { default as resolveSplitTitle } from './resolve-split-title';\n","import {\n  stripUnlikelyCandidates,\n  convertToParagraphs,\n} from 'utils/dom';\n\nimport {\n  scoreContent,\n  findTopCandidate,\n} from './scoring';\n\n// Using a variety of scoring techniques, extract the content most\n// likely to be article text.\n//\n// If strip_unlikely_candidates is True, remove any elements that\n// match certain criteria first. (Like, does this element have a\n// classname of \"comment\")\n//\n// If weight_nodes is True, use classNames and IDs to determine the\n// worthiness of nodes.\n//\n// Returns a cheerio object $\nexport default function extractBestNode($, opts) {\n  // clone the node so we can get back to our\n  // initial parsed state if needed\n  // TODO Do I need this? – AP\n  // let $root = $.root().clone()\n\n\n  if (opts.stripUnlikelyCandidates) {\n    $ = stripUnlikelyCandidates($);\n  }\n\n  $ = convertToParagraphs($);\n  $ = scoreContent($, opts.weightNodes);\n  const $topCandidate = findTopCandidate($);\n\n  return $topCandidate;\n}\n","import cheerio from 'cheerio';\nimport 'babel-polyfill';\n\nimport { nodeIsSufficient } from 'utils/dom';\nimport { cleanContent } from 'cleaners';\nimport { normalizeSpaces } from 'utils/text';\n\nimport extractBestNode from './extract-best-node';\n\nconst GenericContentExtractor = {\n  defaultOpts: {\n    stripUnlikelyCandidates: true,\n    weightNodes: true,\n    cleanConditionally: true,\n  },\n\n  // Extract the content for this resource - initially, pass in our\n  // most restrictive opts which will return the highest quality\n  // content. On each failure, retry with slightly more lax opts.\n  //\n  // :param return_type: string. If \"node\", should return the content\n  // as a cheerio node rather than as an HTML string.\n  //\n  // Opts:\n  // stripUnlikelyCandidates: Remove any elements that match\n  // non-article-like criteria first.(Like, does this element\n  //   have a classname of \"comment\")\n  //\n  // weightNodes: Modify an elements score based on whether it has\n  // certain classNames or IDs. Examples: Subtract if a node has\n  // a className of 'comment', Add if a node has an ID of\n  // 'entry-content'.\n  //\n  // cleanConditionally: Clean the node to return of some\n  // superfluous content. Things like forms, ads, etc.\n  extract({ $, html, title, url }, opts) {\n    opts = { ...this.defaultOpts, ...opts };\n\n    $ = $ || cheerio.load(html);\n\n    // Cascade through our extraction-specific opts in an ordered fashion,\n    // turning them off as we try to extract content.\n    let node = this.getContentNode($, title, url, opts);\n\n    if (nodeIsSufficient(node)) {\n      return this.cleanAndReturnNode(node, $);\n    }\n\n    // We didn't succeed on first pass, one by one disable our\n    // extraction opts and try again.\n    for (const key of Reflect.ownKeys(opts).filter(k => opts[k] === true)) {\n      opts[key] = false;\n      $ = cheerio.load(html);\n\n      node = this.getContentNode($, title, url, opts);\n\n      if (nodeIsSufficient(node)) {\n        break;\n      }\n    }\n\n    return this.cleanAndReturnNode(node, $);\n  },\n\n  // Get node given current options\n  getContentNode($, title, url, opts) {\n    return cleanContent(\n              extractBestNode($, opts),\n      {\n        $,\n        cleanConditionally: opts.cleanConditionally,\n        title,\n        url,\n      });\n  },\n\n  // Once we got here, either we're at our last-resort node, or\n  // we broke early. Make sure we at least have -something- before we\n  // move forward.\n  cleanAndReturnNode(node, $) {\n    if (!node) {\n      return null;\n    }\n\n    return normalizeSpaces($.html(node));\n\n    // if return_type == \"html\":\n    //     return normalize_spaces(node_to_html(node))\n    // else:\n    //     return node\n  },\n\n};\n\nexport default GenericContentExtractor;\n","// TODO: It would be great if we could merge the meta and selector lists into\n// a list of objects, because we could then rank them better. For example,\n// .hentry .entry-title is far better suited than <meta title>.\n\n// An ordered list of meta tag names that denote likely article titles. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\nexport const STRONG_TITLE_META_TAGS = [\n  'tweetmeme-title',\n  'dc.title',\n  'rbtitle',\n  'headline',\n  'title',\n];\n\n// og:title is weak because it typically contains context that we don't like,\n// for example the source site's name. Gotta get that brand into facebook!\nexport const WEAK_TITLE_META_TAGS = [\n  'og:title',\n];\n\n// An ordered list of XPath Selectors to find likely article titles. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nexport const STRONG_TITLE_SELECTORS = [\n  '.hentry .entry-title',\n  'h1#articleHeader',\n  'h1.articleHeader',\n  'h1.article',\n  '.instapaper_title',\n  '#meebo-title',\n];\n\nexport const WEAK_TITLE_SELECTORS = [\n  'article h1',\n  '#entry-title',\n  '.entry-title',\n  '#entryTitle',\n  '#entrytitle',\n  '.entryTitle',\n  '.entrytitle',\n  '#articleTitle',\n  '.articleTitle',\n  'post post-title',\n  'h1.title',\n  'h2.article',\n  'h1',\n  'html head title',\n  'title',\n];\n","import { cleanTitle } from 'cleaners';\nimport {\n  extractFromMeta,\n  extractFromSelectors,\n} from 'utils/dom';\n\nimport {\n  STRONG_TITLE_META_TAGS,\n  WEAK_TITLE_META_TAGS,\n  STRONG_TITLE_SELECTORS,\n  WEAK_TITLE_SELECTORS,\n} from './constants';\n\nconst GenericTitleExtractor = {\n  extract({ $, url, metaCache }) {\n    // First, check to see if we have a matching meta tag that we can make\n    // use of that is strongly associated with the headline.\n    let title;\n\n    title = extractFromMeta($, STRONG_TITLE_META_TAGS, metaCache);\n    if (title) return cleanTitle(title, { url, $ });\n\n    // Second, look through our content selectors for the most likely\n    // article title that is strongly associated with the headline.\n    title = extractFromSelectors($, STRONG_TITLE_SELECTORS);\n    if (title) return cleanTitle(title, { url, $ });\n\n    // Third, check for weaker meta tags that may match.\n    title = extractFromMeta($, WEAK_TITLE_META_TAGS, metaCache);\n    if (title) return cleanTitle(title, { url, $ });\n\n    // Last, look for weaker selector tags that may match.\n    title = extractFromSelectors($, WEAK_TITLE_SELECTORS);\n    if (title) return cleanTitle(title, { url, $ });\n\n    // If no matches, return an empty string\n    return '';\n  },\n};\n\nexport default GenericTitleExtractor;\n","// An ordered list of meta tag names that denote likely article authors. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\n//\n// Note: \"author\" is too often the -developer- of the page, so it is not\n// added here.\nexport const AUTHOR_META_TAGS = [\n  'byl',\n  'clmst',\n  'dc.author',\n  'dcsext.author',\n  'dc.creator',\n  'rbauthors',\n  'authors',\n];\n\nexport const AUTHOR_MAX_LENGTH = 300;\n\n// An ordered list of XPath Selectors to find likely article authors. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nexport const AUTHOR_SELECTORS = [\n  '.entry .entry-author',\n  '.author.vcard .fn',\n  '.author .vcard .fn',\n  '.byline.vcard .fn',\n  '.byline .vcard .fn',\n  '.byline .by .author',\n  '.byline .by',\n  '.byline .author',\n  '.post-author.vcard',\n  '.post-author .vcard',\n  'a[rel=author]',\n  '#by_author',\n  '.by_author',\n  '#entryAuthor',\n  '.entryAuthor',\n  '.byline a[href*=author]',\n  '#author .authorname',\n  '.author .authorname',\n  '#author',\n  '.author',\n  '.articleauthor',\n  '.ArticleAuthor',\n  '.byline',\n];\n\n// An ordered list of Selectors to find likely article authors, with\n// regular expression for content.\nconst bylineRe = /^[\\n\\s]*By/i;\nexport const BYLINE_SELECTORS_RE = [\n  ['#byline', bylineRe],\n  ['.byline', bylineRe],\n];\n","import { cleanAuthor } from 'cleaners';\nimport {\n  extractFromMeta,\n  extractFromSelectors,\n} from 'utils/dom';\n\nimport {\n  AUTHOR_META_TAGS,\n  AUTHOR_MAX_LENGTH,\n  AUTHOR_SELECTORS,\n  BYLINE_SELECTORS_RE,\n} from './constants';\n\nconst GenericAuthorExtractor = {\n  extract({ $, metaCache }) {\n    let author;\n\n    // First, check to see if we have a matching\n    // meta tag that we can make use of.\n    author = extractFromMeta($, AUTHOR_META_TAGS, metaCache);\n    if (author && author.length < AUTHOR_MAX_LENGTH) {\n      return cleanAuthor(author);\n    }\n\n    // Second, look through our selectors looking for potential authors.\n    author = extractFromSelectors($, AUTHOR_SELECTORS, 2);\n    if (author && author.length < AUTHOR_MAX_LENGTH) {\n      return cleanAuthor(author);\n    }\n\n    // Last, use our looser regular-expression based selectors for\n    // potential authors.\n    for (const [selector, regex] of BYLINE_SELECTORS_RE) {\n      const node = $(selector);\n      if (node.length === 1) {\n        const text = node.text();\n        if (regex.test(text)) {\n          return cleanAuthor(text);\n        }\n      }\n    }\n\n    return null;\n  },\n};\n\nexport default GenericAuthorExtractor;\n\n","// An ordered list of meta tag names that denote\n// likely date published dates. All attributes\n// should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nexport const DATE_PUBLISHED_META_TAGS = [\n  'article:published_time',\n  'displaydate',\n  'dc.date',\n  'dc.date.issued',\n  'rbpubdate',\n  'publish_date',\n  'pub_date',\n  'pagedate',\n  'pubdate',\n  'revision_date',\n  'doc_date',\n  'date_created',\n  'content_create_date',\n  'lastmodified',\n  'created',\n  'date',\n];\n\n// An ordered list of XPath Selectors to find\n// likely date published dates. From most explicit\n// to least explicit.\nexport const DATE_PUBLISHED_SELECTORS = [\n  '.hentry .dtstamp.published',\n  '.hentry .published',\n  '.hentry .dtstamp.updated',\n  '.hentry .updated',\n  '.single .published',\n  '.meta .published',\n  '.meta .postDate',\n  '.entry-date',\n  '.byline .date',\n  '.postmetadata .date',\n  '.article_datetime',\n  '.date-header',\n  '.story-date',\n  '.dateStamp',\n  '#story .datetime',\n  '.dateline',\n  '.pubdate',\n];\n\n// An ordered list of compiled regular expressions to find likely date\n// published dates from the URL. These should always have the first\n// reference be a date string that is parseable by dateutil.parser.parse\nconst abbrevMonthsStr = '(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)';\nexport const DATE_PUBLISHED_URL_RES = [\n    // /2012/01/27/ but not /2012/01/293\n  new RegExp('/(20\\\\d{2}/\\\\d{2}/\\\\d{2})/', 'i'),\n    // 20120127 or 20120127T but not 2012012733 or 8201201733\n    // /[^0-9](20\\d{2}[01]\\d[0-3]\\d)([^0-9]|$)/i,\n    // 2012-01-27\n  new RegExp('(20\\\\d{2}-[01]\\\\d-[0-3]\\\\d)', 'i'),\n    // /2012/jan/27/\n  new RegExp(`/(20\\\\d{2}/${abbrevMonthsStr}/[0-3]\\\\d)/`, 'i'),\n];\n\n","import { cleanDatePublished } from 'cleaners';\nimport {\n  extractFromMeta,\n  extractFromSelectors,\n} from 'utils/dom';\nimport { extractFromUrl } from 'utils/text';\n\nimport {\n  DATE_PUBLISHED_META_TAGS,\n  DATE_PUBLISHED_SELECTORS,\n  DATE_PUBLISHED_URL_RES,\n} from './constants';\n\nconst GenericDatePublishedExtractor = {\n  extract({ $, url, metaCache }) {\n    let datePublished;\n    // First, check to see if we have a matching meta tag\n    // that we can make use of.\n    // Don't try cleaning tags from this string\n    datePublished = extractFromMeta($, DATE_PUBLISHED_META_TAGS, metaCache, false);\n    if (datePublished) return cleanDatePublished(datePublished);\n\n    // Second, look through our selectors looking for potential\n    // date_published's.\n    datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS);\n    if (datePublished) return cleanDatePublished(datePublished);\n\n    // Lastly, look to see if a dately string exists in the URL\n    datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);\n    if (datePublished) return cleanDatePublished(datePublished);\n\n    return null;\n  },\n};\n\nexport default GenericDatePublishedExtractor;\n","// import {\n//   DEK_META_TAGS,\n//   DEK_SELECTORS,\n//   DEK_URL_RES,\n// } from './constants';\n\n// import { cleanDek } from 'cleaners';\n\n// import {\n//   extractFromMeta,\n//   extractFromSelectors,\n// } from 'utils/dom';\n\n// Currently there is only one selector for\n// deks. We should simply return null here\n// until we have a more robust generic option.\n// Below is the original source for this, for reference.\nconst GenericDekExtractor = {\n  // extract({ $, content, metaCache }) {\n  extract() {\n    return null;\n  },\n};\n\nexport default GenericDekExtractor;\n\n// def extract_dek(self):\n//     # First, check to see if we have a matching meta tag that we can make\n//     # use of.\n//     dek = self.extract_from_meta('dek', constants.DEK_META_TAGS)\n//     if not dek:\n//         # Second, look through our CSS/XPath selectors. This may return\n//         # an HTML fragment.\n//         dek = self.extract_from_selectors('dek',\n//                                            constants.DEK_SELECTORS,\n//                                            text_only=False)\n//\n//     if dek:\n//         # Make sure our dek isn't in the first few thousand characters\n//         # of the content, otherwise it's just the start of the article\n//         # and not a true dek.\n//         content = self.extract_content()\n//         content_chunk = normalize_spaces(strip_tags(content[:2000]))\n//         dek_chunk = normalize_spaces(dek[:100]) # Already has no tags.\n//\n//         # 80% or greater similarity means the dek was very similar to some\n//         # of the starting content, so we skip it.\n//         if fuzz.partial_ratio(content_chunk, dek_chunk) < 80:\n//             return dek\n//\n//     return None\n","// An ordered list of meta tag names that denote likely article leading images.\n// All attributes should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nexport const LEAD_IMAGE_URL_META_TAGS = [\n  'og:image',\n  'twitter:image',\n  'image_src',\n];\n\nexport const LEAD_IMAGE_URL_SELECTORS = [\n  'link[rel=image_src]',\n];\n\nexport const POSITIVE_LEAD_IMAGE_URL_HINTS = [\n  'upload',\n  'wp-content',\n  'large',\n  'photo',\n  'wp-image',\n];\nexport const POSITIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(POSITIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');\n\nexport const NEGATIVE_LEAD_IMAGE_URL_HINTS = [\n  'spacer',\n  'sprite',\n  'blank',\n  'throbber',\n  'gradient',\n  'tile',\n  'bg',\n  'background',\n  'icon',\n  'social',\n  'header',\n  'hdr',\n  'advert',\n  'spinner',\n  'loader',\n  'loading',\n  'default',\n  'rating',\n  'share',\n  'facebook',\n  'twitter',\n  'theme',\n  'promo',\n  'ads',\n  'wp-includes',\n];\nexport const NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');\n\nexport const GIF_RE = /\\.gif(\\?.*)?$/i;\nexport const JPG_RE = /\\.jpe?g(\\?.*)?$/i;\n","import {\n  POSITIVE_LEAD_IMAGE_URL_HINTS_RE,\n  NEGATIVE_LEAD_IMAGE_URL_HINTS_RE,\n  GIF_RE,\n  JPG_RE,\n} from './constants';\n\nimport { PHOTO_HINTS_RE } from '../content/scoring/constants';\n\nfunction getSig($node) {\n  return `${$node.attr('class') || ''} ${$node.attr('id') || ''}`;\n}\n\n// Scores image urls based on a variety of heuristics.\nexport function scoreImageUrl(url) {\n  url = url.trim();\n  let score = 0;\n\n  if (POSITIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n    score += 20;\n  }\n\n  if (NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n    score -= 20;\n  }\n\n  // TODO: We might want to consider removing this as\n  // gifs are much more common/popular than they once were\n  if (GIF_RE.test(url)) {\n    score -= 10;\n  }\n\n  if (JPG_RE.test(url)) {\n    score += 10;\n  }\n\n  // PNGs are neutral.\n\n  return score;\n}\n\n// Alt attribute usually means non-presentational image.\nexport function scoreAttr($img) {\n  if ($img.attr('alt')) {\n    return 5;\n  }\n\n  return 0;\n}\n\n// Look through our parent and grandparent for figure-like\n// container elements, give a bonus if we find them\nexport function scoreByParents($img) {\n  let score = 0;\n  const $figParent = $img.parents('figure').first();\n\n  if ($figParent.length === 1) {\n    score += 25;\n  }\n\n  const $parent = $img.parent();\n  let $gParent;\n  if ($parent.length === 1) {\n    $gParent = $parent.parent();\n  }\n\n  [$parent, $gParent].forEach(($node) => {\n    if (PHOTO_HINTS_RE.test(getSig($node))) {\n      score += 15;\n    }\n  });\n\n  return score;\n}\n\n// Look at our immediate sibling and see if it looks like it's a\n// caption. Bonus if so.\nexport function scoreBySibling($img) {\n  let score = 0;\n  const $sibling = $img.next();\n  const sibling = $sibling.get(0);\n\n  if (sibling && sibling.tagName === 'figcaption') {\n    score += 25;\n  }\n\n  if (PHOTO_HINTS_RE.test(getSig($sibling))) {\n    score += 15;\n  }\n\n  return score;\n}\n\nexport function scoreByDimensions($img) {\n  let score = 0;\n\n  const width = parseFloat($img.attr('width'));\n  const height = parseFloat($img.attr('height'));\n  const src = $img.attr('src');\n\n  // Penalty for skinny images\n  if (width && width <= 50) {\n    score -= 50;\n  }\n\n  // Penalty for short images\n  if (height && height <= 50) {\n    score -= 50;\n  }\n\n  if (width && height && !src.includes('sprite')) {\n    const area = width * height;\n    if (area < 5000) { // Smaller than 50 x 100\n      score -= 100;\n    } else {\n      score += Math.round(area / 1000);\n    }\n  }\n\n  return score;\n}\n\nexport function scoreByPosition($imgs, index) {\n  return ($imgs.length / 2) - index;\n}\n","import 'babel-polyfill';\n\nimport { extractFromMeta } from 'utils/dom';\nimport { cleanImage } from 'cleaners';\n\nimport {\n  LEAD_IMAGE_URL_META_TAGS,\n  LEAD_IMAGE_URL_SELECTORS,\n} from './constants';\n\nimport {\n  scoreImageUrl,\n  scoreAttr,\n  scoreByParents,\n  scoreBySibling,\n  scoreByDimensions,\n  scoreByPosition,\n} from './score-image';\n\n// Given a resource, try to find the lead image URL from within\n// it. Like content and next page extraction, uses a scoring system\n// to determine what the most likely image may be. Short circuits\n// on really probable things like og:image meta tags.\n//\n// Potential signals to still take advantage of:\n//   * domain\n//   * weird aspect ratio\nconst GenericLeadImageUrlExtractor = {\n  extract({ $, content, metaCache }) {\n    let cleanUrl;\n\n    // Check to see if we have a matching meta tag that we can make use of.\n    // Moving this higher because common practice is now to use large\n    // images on things like Open Graph or Twitter cards.\n    // images usually have for things like Open Graph.\n    const imageUrl =\n      extractFromMeta(\n        $,\n        LEAD_IMAGE_URL_META_TAGS,\n        metaCache,\n        false\n      );\n\n    if (imageUrl) {\n      cleanUrl = cleanImage(imageUrl);\n\n      if (cleanUrl) return cleanUrl;\n    }\n\n    // Next, try to find the \"best\" image via the content.\n    // We'd rather not have to fetch each image and check dimensions,\n    // so try to do some analysis and determine them instead.\n    const imgs = $('img', content).toArray();\n    const imgScores = {};\n\n    imgs.forEach((img, index) => {\n      const $img = $(img);\n      const src = $img.attr('src');\n\n      if (!src) return;\n\n      let score = scoreImageUrl(src);\n      score += scoreAttr($img);\n      score += scoreByParents($img);\n      score += scoreBySibling($img);\n      score += scoreByDimensions($img);\n      score += scoreByPosition(imgs, index);\n\n      imgScores[src] = score;\n    });\n\n    const [topUrl, topScore] =\n      Reflect.ownKeys(imgScores).reduce((acc, key) =>\n        imgScores[key] > acc[1] ? [key, imgScores[key]] : acc\n      , [null, 0]);\n\n    if (topScore > 0) {\n      cleanUrl = cleanImage(topUrl);\n\n      if (cleanUrl) return cleanUrl;\n    }\n\n    // If nothing else worked, check to see if there are any really\n    // probable nodes in the doc, like <link rel=\"image_src\" />.\n    for (const selector of LEAD_IMAGE_URL_SELECTORS) {\n      const $node = $(selector).first();\n      const src = $node.attr('src');\n      if (src) {\n        cleanUrl = cleanImage(src);\n        if (cleanUrl) return cleanUrl;\n      }\n\n      const href = $node.attr('href');\n      if (href) {\n        cleanUrl = cleanImage(href);\n        if (cleanUrl) return cleanUrl;\n      }\n\n      const value = $node.attr('value');\n      if (value) {\n        cleanUrl = cleanImage(value);\n        if (cleanUrl) return cleanUrl;\n      }\n    }\n\n    return null;\n  },\n};\n\nexport default GenericLeadImageUrlExtractor;\n\n// def extract(self):\n//     \"\"\"\n//     # First, try to find the \"best\" image via the content.\n//     # We'd rather not have to fetch each image and check dimensions,\n//     # so try to do some analysis and determine them instead.\n//     content = self.extractor.extract_content(return_type=\"node\")\n//     imgs = content.xpath('.//img')\n//     img_scores = defaultdict(int)\n//     logger.debug('Scoring %d images from content', len(imgs))\n//     for (i, img) in enumerate(imgs):\n//         img_score = 0\n//\n//         if not 'src' in img.attrib:\n//             logger.debug('No src attribute found')\n//             continue\n//\n//         try:\n//             parsed_img = urlparse(img.attrib['src'])\n//             img_path = parsed_img.path.lower()\n//         except ValueError:\n//             logger.debug('ValueError getting img path.')\n//             continue\n//         logger.debug('Image path is %s', img_path)\n//\n//         if constants.POSITIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):\n//             logger.debug('Positive URL hints match. Adding 20.')\n//             img_score += 20\n//\n//         if constants.NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):\n//             logger.debug('Negative URL hints match. Subtracting 20.')\n//             img_score -= 20\n//\n//         # Gifs are more often structure than photos\n//         if img_path.endswith('gif'):\n//             logger.debug('gif found. Subtracting 10.')\n//             img_score -= 10\n//\n//         # JPGs are more often photographs\n//         if img_path.endswith('jpg'):\n//             logger.debug('jpg found. Adding 10.')\n//             img_score += 10\n//\n//         # PNGs are neutral.\n//\n//         # Alt attribute usually means non-presentational image.\n//         if 'alt' in img.attrib and len(img.attrib['alt']) > 5:\n//             logger.debug('alt attribute found. Adding 5.')\n//             img_score += 5\n//\n//         # Look through our parent and grandparent for figure-like\n//         # container elements, give a bonus if we find them\n//         parents = [img.getparent()]\n//         if parents[0] is not None and parents[0].getparent() is not None:\n//             parents.append(parents[0].getparent())\n//         for p in parents:\n//             if p.tag == 'figure':\n//                 logger.debug('Parent with <figure> tag found. Adding 25.')\n//                 img_score += 25\n//\n//             p_sig = ' '.join([p.get('id', ''), p.get('class', '')])\n//             if constants.PHOTO_HINTS_RE.search(p_sig):\n//                 logger.debug('Photo hints regex match. Adding 15.')\n//                 img_score += 15\n//\n//         # Look at our immediate sibling and see if it looks like it's a\n//         # caption. Bonus if so.\n//         sibling = img.getnext()\n//         if sibling is not None:\n//             if sibling.tag == 'figcaption':\n//                 img_score += 25\n//\n//             sib_sig = ' '.join([sibling.get('id', ''),\n//                                 sibling.get('class', '')]).lower()\n//             if 'caption' in sib_sig:\n//                 img_score += 15\n//\n//         # Pull out width/height if they were set.\n//         img_width = None\n//         img_height = None\n//         if 'width' in img.attrib:\n//             try:\n//                 img_width = float(img.get('width'))\n//             except ValueError:\n//                 pass\n//         if 'height' in img.attrib:\n//             try:\n//                 img_height = float(img.get('height'))\n//             except ValueError:\n//                 pass\n//\n//         # Penalty for skinny images\n//         if img_width and img_width <= 50:\n//             logger.debug('Skinny image found. Subtracting 50.')\n//             img_score -= 50\n//\n//         # Penalty for short images\n//         if img_height and img_height <= 50:\n//             # Wide, short images are more common than narrow, tall ones\n//             logger.debug('Short image found. Subtracting 25.')\n//             img_score -= 25\n//\n//         if img_width and img_height and not 'sprite' in img_path:\n//             area = img_width * img_height\n//\n//             if area < 5000: # Smaller than 50x100\n//                 logger.debug('Image with small area found. Subtracting 100.')\n//                 img_score -= 100\n//             else:\n//                 img_score += round(area/1000.0)\n//\n//         # If the image is higher on the page than other images,\n//         # it gets a bonus. Penalty if lower.\n//         logger.debug('Adding page placement bonus of %d.', len(imgs)/2 - i)\n//         img_score += len(imgs)/2 - i\n//\n//         # Use the raw src here because we munged img_path for case\n//         # insensitivity\n//         logger.debug('Final score is %d.', img_score)\n//         img_scores[img.attrib['src']] += img_score\n//\n//     top_score = 0\n//     top_url = None\n//     for (url, score) in img_scores.items():\n//         if score > top_score:\n//             top_url = url\n//             top_score = score\n//\n//     if top_score > 0:\n//         logger.debug('Using top score image from content. Score was %d', top_score)\n//         return top_url\n//\n//\n//     # If nothing else worked, check to see if there are any really\n//     # probable nodes in the doc, like <link rel=\"image_src\" />.\n//     logger.debug('Trying to find lead image in probable nodes')\n//     for selector in constants.LEAD_IMAGE_URL_SELECTORS:\n//         nodes = self.resource.extract_by_selector(selector)\n//         for node in nodes:\n//             clean_value = None\n//             if node.attrib.get('src'):\n//                 clean_value = self.clean(node.attrib['src'])\n//\n//             if not clean_value and node.attrib.get('href'):\n//                 clean_value = self.clean(node.attrib['href'])\n//\n//             if not clean_value and node.attrib.get('value'):\n//                 clean_value = self.clean(node.attrib['value'])\n//\n//             if clean_value:\n//                 logger.debug('Found lead image in probable nodes.')\n//                 logger.debug('Node was: %s', node)\n//                 return clean_value\n//\n//     return None\n","import difflib from 'difflib';\n\nexport default function scoreSimilarity(score, articleUrl, href) {\n  // Do this last and only if we have a real candidate, because it's\n  // potentially expensive computationally. Compare the link to this\n  // URL using difflib to get the % similarity of these URLs. On a\n  // sliding scale, subtract points from this link based on\n  // similarity.\n  if (score > 0) {\n    const similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio();\n    // Subtract .1 from diff_percent when calculating modifier,\n    // which means that if it's less than 10% different, we give a\n    // bonus instead. Ex:\n    //  3% different = +17.5 points\n    // 10% different = 0 points\n    // 20% different = -25 points\n    const diffPercent = 1.0 - similarity;\n    const diffModifier = -(250 * (diffPercent - 0.2));\n    return score + diffModifier;\n  }\n\n  return 0;\n}\n","import { IS_DIGIT_RE } from 'utils/text/constants';\n\nexport default function scoreLinkText(linkText, pageNum) {\n  // If the link text can be parsed as a number, give it a minor\n  // bonus, with a slight bias towards lower numbered pages. This is\n  // so that pages that might not have 'next' in their text can still\n  // get scored, and sorted properly by score.\n  let score = 0;\n\n  if (IS_DIGIT_RE.test(linkText.trim())) {\n    const linkTextAsNum = parseInt(linkText, 10);\n    // If it's the first page, we already got it on the first call.\n    // Give it a negative score. Otherwise, up to page 10, give a\n    // small bonus.\n    if (linkTextAsNum < 2) {\n      score = -30;\n    } else {\n      score = Math.max(0, 10 - linkTextAsNum);\n    }\n\n    // If it appears that the current page number is greater than\n    // this links page number, it's a very bad sign. Give it a big\n    // penalty.\n    if (pageNum && pageNum >= linkTextAsNum) {\n      score -= 50;\n    }\n  }\n\n  return score;\n}\n","export default function scorePageInLink(pageNum, isWp) {\n  // page in the link = bonus. Intentionally ignore wordpress because\n  // their ?p=123 link style gets caught by this even though it means\n  // separate documents entirely.\n  if (pageNum && !isWp) {\n    return 50;\n  }\n\n  return 0;\n}\n","export const DIGIT_RE = /\\d/;\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nexport const EXTRANEOUS_LINK_HINTS = [\n  'print',\n  'archive',\n  'comment',\n  'discuss',\n  'e-mail',\n  'email',\n  'share',\n  'reply',\n  'all',\n  'login',\n  'sign',\n  'single',\n  'adx',\n  'entry-unrelated',\n];\nexport const EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\nexport const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^|]|$)|»([^|]|$))', 'i');\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nexport const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nexport const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nexport const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n\n","import { EXTRANEOUS_LINK_HINTS_RE } from '../constants';\n\nexport default function scoreExtraneousLinks(href) {\n  // If the URL itself contains extraneous values, give a penalty.\n  if (EXTRANEOUS_LINK_HINTS_RE.test(href)) {\n    return -25;\n  }\n\n  return 0;\n}\n","import { range } from 'utils';\nimport {\n  NEGATIVE_SCORE_RE,\n  POSITIVE_SCORE_RE,\n  PAGE_RE,\n} from 'utils/dom/constants';\nimport { EXTRANEOUS_LINK_HINTS_RE } from '../constants';\n\nfunction makeSig($link) {\n  return `${$link.attr('class') || ''} ${$link.attr('id') || ''}`;\n}\n\nexport default function scoreByParents($link) {\n  // If a parent node contains paging-like classname or id, give a\n  // bonus. Additionally, if a parent_node contains bad content\n  // (like 'sponsor'), give a penalty.\n  let $parent = $link.parent();\n  let positiveMatch = false;\n  let negativeMatch = false;\n  let score = 0;\n\n  Array.from(range(0, 4)).forEach(() => {\n    if ($parent.length === 0) {\n      return;\n    }\n\n    const parentData = makeSig($parent, ' ');\n\n    // If we have 'page' or 'paging' in our data, that's a good\n    // sign. Add a bonus.\n    if (!positiveMatch && PAGE_RE.test(parentData)) {\n      positiveMatch = true;\n      score += 25;\n    }\n\n    // If we have 'comment' or something in our data, and\n    // we don't have something like 'content' as well, that's\n    // a bad sign. Give a penalty.\n    if (!negativeMatch && NEGATIVE_SCORE_RE.test(parentData)\n       && EXTRANEOUS_LINK_HINTS_RE.test(parentData)) {\n      if (!POSITIVE_SCORE_RE.test(parentData)) {\n        negativeMatch = true;\n        score -= 25;\n      }\n    }\n\n    $parent = $parent.parent();\n  });\n\n  return score;\n}\n\n","import { PREV_LINK_TEXT_RE } from '../constants';\n\nexport default function scorePrevLink(linkData) {\n  // If the link has something like \"previous\", its definitely\n  // an old link, skip it.\n  if (PREV_LINK_TEXT_RE.test(linkData)) {\n    return -200;\n  }\n\n  return 0;\n}\n","import URL from 'url';\n\nimport {\n  DIGIT_RE,\n  EXTRANEOUS_LINK_HINTS_RE,\n} from '../constants';\n\nexport default function shouldScore(\n  href,\n  articleUrl,\n  baseUrl,\n  parsedUrl,\n  linkText,\n  previousUrls\n) {\n  // skip if we've already fetched this url\n  if (previousUrls.find(url => href === url) !== undefined) {\n    return false;\n  }\n\n  // If we've already parsed this URL, or the URL matches the base\n  // URL, or is empty, skip it.\n  if (!href || href === articleUrl || href === baseUrl) {\n    return false;\n  }\n\n  const { hostname } = parsedUrl;\n  const { hostname: linkHost } = URL.parse(href);\n\n  // Domain mismatch.\n  if (linkHost !== hostname) {\n    return false;\n  }\n\n  // If href doesn't contain a digit after removing the base URL,\n  // it's certainly not the next page.\n  const fragment = href.replace(baseUrl, '');\n  if (!DIGIT_RE.test(fragment)) {\n    return false;\n  }\n\n  // This link has extraneous content (like \"comment\") in its link\n  // text, so we skip it.\n  if (EXTRANEOUS_LINK_HINTS_RE.test(linkText)) {\n    return false;\n  }\n\n  // Next page link text is never long, skip if it is too long.\n  if (linkText.length > 25) {\n    return false;\n  }\n\n  return true;\n}\n\n","export default function scoreBaseUrl(href, baseRegex) {\n  // If the baseUrl isn't part of this URL, penalize this\n  // link. It could still be the link, but the odds are lower.\n  // Example:\n  // http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html\n  if (!baseRegex.test(href)) {\n    return -25;\n  }\n\n  return 0;\n}\n","import { NEXT_LINK_TEXT_RE } from '../constants';\n\nexport default function scoreNextLinkText(linkData) {\n  // Things like \"next\", \">>\", etc.\n  if (NEXT_LINK_TEXT_RE.test(linkData)) {\n    return 50;\n  }\n\n  return 0;\n}\n","import {\n  NEXT_LINK_TEXT_RE,\n  CAP_LINK_TEXT_RE,\n} from '../constants';\n\nexport default function scoreCapLinks(linkData) {\n  // Cap links are links like \"last\", etc.\n  if (CAP_LINK_TEXT_RE.test(linkData)) {\n    // If we found a link like \"last\", but we've already seen that\n    // this link is also \"next\", it's fine. If it's not been\n    // previously marked as \"next\", then it's probably bad.\n    // Penalize.\n    if (NEXT_LINK_TEXT_RE.test(linkData)) {\n      return -65;\n    }\n  }\n\n  return 0;\n}\n","import 'babel-polyfill';\nimport URL from 'url';\n\nimport { isWordpress } from 'utils/dom';\nimport {\n  removeAnchor,\n  pageNumFromUrl,\n} from 'utils/text';\n\nimport {\n  scoreSimilarity,\n  scoreLinkText,\n  scorePageInLink,\n  scoreExtraneousLinks,\n  scoreByParents,\n  scorePrevLink,\n  shouldScore,\n  scoreBaseUrl,\n  scoreCapLinks,\n  scoreNextLinkText,\n} from './utils';\n\nexport function makeBaseRegex(baseUrl) {\n  return new RegExp(`^${baseUrl}`, 'i');\n}\n\nfunction makeSig($link, linkText) {\n  return `${linkText || $link.text()} ${$link.attr('class') || ''} ${$link.attr('id') || ''}`;\n}\n\nexport default function scoreLinks({\n  links,\n  articleUrl,\n  baseUrl,\n  parsedUrl,\n  $,\n  previousUrls = [],\n}) {\n  parsedUrl = parsedUrl || URL.parse(articleUrl);\n  const baseRegex = makeBaseRegex(baseUrl);\n  const isWp = isWordpress($);\n\n  // Loop through all links, looking for hints that they may be next-page\n  // links. Things like having \"page\" in their textContent, className or\n  // id, or being a child of a node with a page-y className or id.\n  //\n  // After we do that, assign each page a score, and pick the one that\n  // looks most like the next page link, as long as its score is strong\n  // enough to have decent confidence.\n  const scoredPages = links.reduce((possiblePages, link) => {\n    // Remove any anchor data since we don't do a good job\n    // standardizing URLs (it's hard), we're going to do\n    // some checking with and without a trailing slash\n    const href = removeAnchor(link.attribs.href);\n    const $link = $(link);\n    const linkText = $link.text();\n\n    if (!shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls)) {\n      return possiblePages;\n    }\n\n    // ## PASSED THE FIRST-PASS TESTS. Start scoring. ##\n    if (!possiblePages[href]) {\n      possiblePages[href] = {\n        score: 0,\n        linkText,\n        href,\n      };\n    } else {\n      possiblePages[href].linkText = `${possiblePages[href].linkText}|${linkText}`;\n    }\n\n    const possiblePage = possiblePages[href];\n    const linkData = makeSig($link, linkText);\n    const pageNum = pageNumFromUrl(href);\n\n    let score = scoreBaseUrl(href, baseRegex);\n    score += scoreNextLinkText(linkData);\n    score += scoreCapLinks(linkData);\n    score += scorePrevLink(linkData);\n    score += scoreByParents($link);\n    score += scoreExtraneousLinks(href);\n    score += scorePageInLink(pageNum, isWp);\n    score += scoreLinkText(linkText, pageNum);\n    score += scoreSimilarity(score, articleUrl, href);\n\n    possiblePage.score = score;\n\n    return possiblePages;\n  }, {});\n\n  return Reflect.ownKeys(scoredPages).length === 0 ? null : scoredPages;\n}\n","import 'babel-polyfill';\nimport URL from 'url';\n\nimport {\n  articleBaseUrl,\n  removeAnchor,\n} from 'utils/text';\nimport scoreLinks from './scoring/score-links';\n\n// Looks for and returns next page url\n// for multi-page articles\nconst GenericNextPageUrlExtractor = {\n  extract({ $, url, parsedUrl, previousUrls = [] }) {\n    parsedUrl = parsedUrl || URL.parse(url);\n\n    const articleUrl = removeAnchor(url);\n    const baseUrl = articleBaseUrl(url, parsedUrl);\n\n    const links = $('a[href]').toArray();\n\n    const scoredLinks = scoreLinks({\n      links,\n      articleUrl,\n      baseUrl,\n      parsedUrl,\n      $,\n      previousUrls,\n    });\n\n    // If no links were scored, return null\n    if (!scoredLinks) return null;\n\n    // now that we've scored all possible pages,\n    // find the biggest one.\n    const topPage = Reflect.ownKeys(scoredLinks).reduce((acc, link) => {\n      const scoredLink = scoredLinks[link];\n      return scoredLink.score > acc.score ? scoredLink : acc;\n    }, { score: -100 });\n\n    // If the score is less than 50, we're not confident enough to use it,\n    // so we fail.\n    if (topPage.score >= 50) {\n      return topPage.href;\n    }\n\n    return null;\n  },\n};\n\n\nexport default GenericNextPageUrlExtractor;\n","export const CANONICAL_META_SELECTORS = [\n  'og:url',\n];\n","import URL from 'url';\nimport { extractFromMeta } from 'utils/dom';\n\nimport { CANONICAL_META_SELECTORS } from './constants';\n\nfunction parseDomain(url) {\n  const parsedUrl = URL.parse(url);\n  const { hostname } = parsedUrl;\n  return hostname;\n}\n\nfunction result(url) {\n  return {\n    url,\n    domain: parseDomain(url),\n  };\n}\n\nconst GenericUrlExtractor = {\n  extract({ $, url, metaCache }) {\n    const $canonical = $('link[rel=canonical]');\n    if ($canonical.length !== 0) {\n      const href = $canonical.attr('href');\n      if (href) {\n        return result(href);\n      }\n    }\n\n    const metaUrl = extractFromMeta($, CANONICAL_META_SELECTORS, metaCache);\n    if (metaUrl) {\n      return result(metaUrl);\n    }\n\n    return result(url);\n  },\n\n};\n\nexport default GenericUrlExtractor;\n","export const EXCERPT_META_SELECTORS = [\n  'og:description',\n  'twitter:description',\n];\n","import ellipsize from 'ellipsize';\n\nimport {\n  extractFromMeta,\n  stripTags,\n} from 'utils/dom';\n\nimport { EXCERPT_META_SELECTORS } from './constants';\n\nexport function clean(content, $, maxLength = 200) {\n  content = content.replace(/[\\s\\n]+/g, ' ').trim();\n  return ellipsize(content, maxLength, { ellipse: '&hellip;' });\n}\n\nconst GenericExcerptExtractor = {\n  extract({ $, content, metaCache }) {\n    const excerpt = extractFromMeta($, EXCERPT_META_SELECTORS, metaCache);\n    if (excerpt) {\n      return clean(stripTags(excerpt, $));\n    }\n    // Fall back to excerpting from the extracted content\n    const maxLength = 200;\n    const shortContent = content.slice(0, maxLength * 5);\n    return clean($(shortContent).text(), $, maxLength);\n  },\n};\n\nexport default GenericExcerptExtractor;\n","import cheerio from 'cheerio';\n\nimport { normalizeSpaces } from 'utils/text';\n\nconst GenericWordCountExtractor = {\n  extract({ content }) {\n    const $ = cheerio.load(content);\n\n    const text = normalizeSpaces($('div').first().text());\n    return text.split(/\\s/).length;\n  },\n};\n\nexport default GenericWordCountExtractor;\n","import cheerio from 'cheerio';\nimport stringDirection from 'string-direction';\n\nimport GenericContentExtractor from './content/extractor';\nimport GenericTitleExtractor from './title/extractor';\nimport GenericAuthorExtractor from './author/extractor';\nimport GenericDatePublishedExtractor from './date-published/extractor';\nimport GenericDekExtractor from './dek/extractor';\nimport GenericLeadImageUrlExtractor from './lead-image-url/extractor';\nimport GenericNextPageUrlExtractor from './next-page-url/extractor';\nimport GenericUrlExtractor from './url/extractor';\nimport GenericExcerptExtractor from './excerpt/extractor';\nimport GenericWordCountExtractor from './word-count/extractor';\n\nconst GenericExtractor = {\n  // This extractor is the default for all domains\n  domain: '*',\n  title: GenericTitleExtractor.extract,\n  date_published: GenericDatePublishedExtractor.extract,\n  author: GenericAuthorExtractor.extract,\n  content: GenericContentExtractor.extract.bind(GenericContentExtractor),\n  lead_image_url: GenericLeadImageUrlExtractor.extract,\n  dek: GenericDekExtractor.extract,\n  next_page_url: GenericNextPageUrlExtractor.extract,\n  url_and_domain: GenericUrlExtractor.extract,\n  excerpt: GenericExcerptExtractor.extract,\n  word_count: GenericWordCountExtractor.extract,\n  direction: ({ title }) => stringDirection.getDirection(title),\n\n  extract(options) {\n    const { html } = options;\n\n    if (html) {\n      const $ = cheerio.load(html);\n      options.$ = $;\n    }\n\n    const title = this.title(options);\n    const date_published = this.date_published(options);\n    const author = this.author(options);\n    const content = this.content({ ...options, title });\n    const lead_image_url = this.lead_image_url({ ...options, content });\n    const dek = this.dek({ ...options, content });\n    const next_page_url = this.next_page_url(options);\n    const excerpt = this.excerpt({ ...options, content });\n    const word_count = this.word_count({ ...options, content });\n    const direction = this.direction({ title });\n    const { url, domain } = this.url_and_domain(options);\n\n    return {\n      title,\n      author,\n      date_published: date_published || null,\n      dek,\n      lead_image_url,\n      content,\n      next_page_url,\n      url,\n      domain,\n      excerpt,\n      word_count,\n      direction,\n    };\n  },\n};\n\nexport default GenericExtractor;\n","import URL from 'url';\n\nimport Extractors from './all';\nimport GenericExtractor from './generic';\n\nexport default function getExtractor(url, parsedUrl) {\n  parsedUrl = parsedUrl || URL.parse(url);\n  const { hostname } = parsedUrl;\n  const baseDomain = hostname.split('.').slice(-2).join('.');\n\n  return Extractors[hostname] || Extractors[baseDomain] || GenericExtractor;\n}\n","export const ATTR_RE = /\\[([\\w-]+)\\]/;\n","import 'babel-polyfill';\n\nimport Cleaners from 'cleaners';\nimport { convertNodeTo } from 'utils/dom';\nimport GenericExtractor from './generic';\nimport { ATTR_RE } from './constants';\n\n// Remove elements by an array of selectors\nexport function cleanBySelectors($content, $, { clean }) {\n  if (!clean) return null;\n\n  $(clean.join(','), $content).remove();\n\n  return $content;\n}\n\n// Transform matching elements\nexport function transformElements($content, $, { transforms }) {\n  if (!transforms) return null;\n\n  Reflect.ownKeys(transforms).forEach((key) => {\n    const $matches = $(key, $content);\n    const value = transforms[key];\n\n    // If value is a string, convert directly\n    if (typeof value === 'string') {\n      $matches.each((index, node) => {\n        convertNodeTo($(node), $, transforms[key]);\n      });\n    } else if (typeof value === 'function') {\n      // If value is function, apply function to node\n      $matches.each((index, node) => {\n        const result = value($(node), $);\n        // If function returns a string, convert node to that value\n        if (typeof result === 'string') {\n          convertNodeTo($(node), $, result);\n        }\n      });\n    }\n  });\n\n  return $content;\n}\n\nexport function select(opts) {\n  const { $, type, extractionOpts, extractHtml = false } = opts;\n  // Skip if there's not extraction for this type\n  if (!extractionOpts) return null;\n\n  // If a string is hardcoded for a type (e.g., Wikipedia\n  // contributors), return the string\n  if (typeof extractionOpts === 'string') return extractionOpts;\n\n  const { selectors, defaultCleaner = true } = extractionOpts;\n\n  const matchingSelector = selectors.find(selector => $(selector).length === 1 && $(selector).text().trim() !== '');\n\n  if (!matchingSelector) return null;\n\n  // Declaring result; will contain either\n  // text or html, which will be cleaned\n  // by the appropriate cleaner type\n\n  // If the selector type requests html as its return type\n  // transform and clean the element with provided selectors\n  if (extractHtml) {\n    let $content = $(matchingSelector);\n\n    // Wrap in div so transformation can take place on root element\n    $content.wrap($('<div></div>'));\n    $content = $content.parent();\n\n    $content = transformElements($content, $, extractionOpts);\n    $content = cleanBySelectors($content, $, extractionOpts);\n\n    if (defaultCleaner) {\n      $content = Cleaners[type]($content, opts);\n    }\n\n    return $.html($content);\n  }\n  // if selector includes an attr (e.g., img[src]),\n  // extract the attr\n  const attr = matchingSelector.match(ATTR_RE);\n  let result;\n\n  if (attr) {\n    result = $(matchingSelector).attr(attr[1]);\n  } else {\n    // otherwise use the text of the node\n    result = $(matchingSelector).text();\n  }\n\n  // Allow custom extractor to skip default cleaner\n  // for this type; defaults to true\n  if (defaultCleaner) {\n    return Cleaners[type](result, opts);\n  }\n\n  return result;\n}\n\nfunction extractResult(opts) {\n  const { type, extractor } = opts;\n\n  // If nothing matches the selector,\n  // run the Generic extraction\n  return select({ ...opts, extractionOpts: extractor[type] }) ||\n    GenericExtractor[type](opts);\n}\n\nconst RootExtractor = {\n  extract(extractor = GenericExtractor, opts) {\n    const { contentOnly, extractedTitle } = opts;\n    // This is the generic extractor. Run its extract method\n    if (extractor.domain === '*') return extractor.extract(opts);\n\n    opts = {\n      ...opts,\n      extractor,\n    };\n\n    if (contentOnly) {\n      const content = extractResult({\n        ...opts, type: 'content', extractHtml: true, title: extractedTitle,\n      });\n      return {\n        content,\n      };\n    }\n    const title = extractResult({ ...opts, type: 'title' });\n    const date_published = extractResult({ ...opts, type: 'date_published' });\n    const author = extractResult({ ...opts, type: 'author' });\n    const next_page_url = extractResult({ ...opts, type: 'next_page_url' });\n    const content = extractResult({\n      ...opts, type: 'content', extractHtml: true, title,\n    });\n    const lead_image_url = extractResult({ ...opts, type: 'lead_image_url', content });\n    const dek = extractResult({ ...opts, type: 'dek', content });\n    const excerpt = extractResult({ ...opts, type: 'excerpt', content });\n    const word_count = extractResult({ ...opts, type: 'word_count', content });\n    const direction = extractResult({ ...opts, type: 'direction', title });\n    const { url, domain } = extractResult({ ...opts, type: 'url_and_domain' });\n\n    return {\n      title,\n      content,\n      author,\n      date_published,\n      lead_image_url,\n      dek,\n      next_page_url,\n      url,\n      domain,\n      excerpt,\n      word_count,\n      direction,\n    };\n  },\n};\n\nexport default RootExtractor;\n","import 'babel-polyfill';\nimport { removeAnchor } from 'utils/text';\nimport RootExtractor from 'extractors/root-extractor';\nimport Resource from 'resource';\n\nexport default async function collectAllPages(\n  {\n    next_page_url,\n    html,\n    $,\n    metaCache,\n    result,\n    Extractor,\n    title,\n    url,\n  }\n) {\n  // At this point, we've fetched just the first page\n  let pages = 1;\n  const previousUrls = [removeAnchor(url)];\n\n  // If we've gone over 26 pages, something has\n  // likely gone wrong.\n  while (next_page_url && pages < 26) {\n    pages += 1;\n    $ = await Resource.create(next_page_url);\n    html = $.html();\n\n    const extractorOpts = {\n      url: next_page_url,\n      html,\n      $,\n      metaCache,\n      contentOnly: true,\n      extractedTitle: title,\n      previousUrls,\n    };\n\n    const nextPageResult = RootExtractor.extract(Extractor, extractorOpts);\n\n    previousUrls.push(next_page_url);\n    result = {\n      ...result,\n      content: `\n        ${result.content}\n        <hr>\n        <h4>Page ${pages}</h4>\n        ${nextPageResult.content}\n        `,\n    };\n\n    next_page_url = nextPageResult.next_page_url;\n  }\n\n  return {\n    ...result,\n    total_pages: pages,\n    pages_rendered: pages,\n  };\n}\n","import URL from 'url';\n\nimport Resource from 'resource';\nimport {\n  validateUrl,\n  Errors,\n} from 'utils';\nimport getExtractor from 'extractors/get-extractor';\nimport RootExtractor from 'extractors/root-extractor';\nimport collectAllPages from 'extractors/collect-all-pages';\n\nconst Mercury = {\n  async parse(url, html, opts = {}) {\n    const { fetchAllPages = true } = opts || true;\n\n    const parsedUrl = URL.parse(url);\n\n    if (!validateUrl(parsedUrl)) {\n      return Errors.badUrl;\n    }\n\n    const Extractor = getExtractor(url, parsedUrl);\n    // console.log(`Using extractor for ${Extractor.domain}`);\n\n    const $ = await Resource.create(url, html, parsedUrl);\n\n    // If we found an error creating the resource, return that error\n    if ($.error) {\n      return $;\n    }\n\n    html = $.html();\n\n    // Cached value of every meta name in our document.\n    // Used when extracting title/author/date_published/dek\n    const metaCache = $('meta').map((_, node) => $(node).attr('name')).toArray();\n\n    let result = RootExtractor.extract(Extractor, { url, html, $, metaCache, parsedUrl });\n    const { title, next_page_url } = result;\n\n    // Fetch more pages if next_page_url found\n    if (fetchAllPages && next_page_url) {\n      result = await collectAllPages(\n        {\n          Extractor,\n          next_page_url,\n          html,\n          $,\n          metaCache,\n          result,\n          title,\n          url,\n        }\n      );\n    } else {\n      result = {\n        ...result,\n        total_pages: 1,\n        rendered_pages: 1,\n      };\n    }\n\n    return result;\n  },\n\n};\n\nexport default Mercury;\n"],"names":["range","start","end","validateUrl","hostname","Errors","REQUEST_HEADERS","FETCH_TIMEOUT","BAD_CONTENT_TYPES","BAD_CONTENT_TYPES_RE","RegExp","join","MAX_CONTENT_LENGTH","get","options","Promise","resolve","reject","err","response","body","validateResponse","parseNon2xx","statusMessage","statusCode","Error","error","headers","contentType","contentLength","test","url","parsedUrl","URL","parse","encodeURI","badUrl","fetchResource","convertMetaProp","$","from","to","each","_","node","$node","value","attr","removeAttr","normalizeMetaTags","IS_LINK","IS_IMAGE","TAGS_TO_REMOVE","convertLazyLoadedImages","img","ownKeys","attribs","forEach","isComment","index","type","cleanComments","root","find","contents","filter","remove","clean","Resource","preparedResponse","validResponse","result","generateDoc","content","includes","cheerio","load","normalizeWhitespace","children","length","NYMagExtractor","$children","tagName","BloggerExtractor","WikipediaExtractor","$parent","parents","prepend","TwitterExtractor","tweets","$tweetContainer","append","replaceWith","Extractors","SPACER_RE","STRIP_OUTPUT_TAGS","REMOVE_ATTRS","REMOVE_ATTR_SELECTORS","map","selector","REMOVE_ATTR_LIST","WHITELIST_ATTRS","WHITELIST_ATTRS_RE","REMOVE_EMPTY_TAGS","REMOVE_EMPTY_SELECTORS","tag","CLEAN_CONDITIONALLY_TAGS","HEADER_TAGS","HEADER_TAG_LIST","UNLIKELY_CANDIDATES_BLACKLIST","UNLIKELY_CANDIDATES_WHITELIST","DIV_TO_P_BLOCK_TAGS","NON_TOP_CANDIDATE_TAGS","NON_TOP_CANDIDATE_TAGS_RE","PHOTO_HINTS","PHOTO_HINTS_RE","POSITIVE_SCORE_HINTS","POSITIVE_SCORE_RE","NEGATIVE_SCORE_HINTS","NEGATIVE_SCORE_RE","IS_WP_SELECTOR","EXTRANEOUS_LINK_HINTS","EXTRANEOUS_LINK_HINTS_RE","PAGE_RE","BLOCK_LEVEL_TAGS","BLOCK_LEVEL_TAGS_RE","candidatesBlacklist","CANDIDATES_BLACKLIST","candidatesWhitelist","CANDIDATES_WHITELIST","stripUnlikelyCandidates","not","classes","id","classAndId","brsToPs","collapsing","element","nextElement","next","paragraphize","br","sibling","nextSibling","p","appendTo","convertDivs","div","$div","convertable","convertSpans","span","$span","convertToParagraphs","convertNodeTo","attribString","Reflect","key","cleanForHeight","$img","height","parseInt","width","removeSpacers","cleanImages","$article","stripJunkTags","article","cleanHOnes","$hOnes","removeAllButWhitelist","reduce","acc","cleanAttributes","removeEmpty","$p","text","trim","HNEWS_CONTENT_SELECTORS","READABILITY_ASSET","PARAGRAPH_SCORE_TAGS","CHILD_CONTENT_TAGS","BAD_TAGS","getWeight","score","getScore","parseFloat","scoreCommas","match","idkRe","scoreLength","textLength","chunks","lengthBonus","Math","min","max","scoreParagraph","slice","setScore","addScore","amount","getOrInitScore","e","addToParent","parent","weightNodes","scoreNode","addScoreTo","scorePs","rawScore","scoreContent","parentSelector","childSelector","NORMALIZE_RE","normalizeSpaces","replace","extractFromUrl","regexList","matchRe","re","exec","PAGE_IN_HREF_RE","HAS_ALPHA_RE","IS_ALPHA_RE","IS_DIGIT_RE","pageNumFromUrl","matches","pageNum","removeAnchor","split","isGoodSegment","segment","firstSegmentHasLetters","goodSegment","toLowerCase","articleBaseUrl","parsed","protocol","host","path","cleanedSegments","reverse","rawSegment","possibleSegment","fileExt","push","SENTENCE_END_RE","hasSentenceEnd","mergeSiblings","$candidate","topScore","siblingScoreThreshold","wrappingDiv","$sibling","siblingScore","contentBonus","density","linkDensity","newScore","siblingContent","siblingContentLength","findTopCandidate","first","removeUnlessContent","weight","hasClass","pCount","inputCount","imgCount","nodeIsList","previousNode","prev","scriptCount","cleanTags","cleanHeaders","title","header","$header","prevAll","rewriteTopLevel","absolutize","rootUrl","$content","absoluteUrl","makeLinksAbsolute","totalTextLength","linkText","linkLength","extractFromMeta","metaNames","cachedNames","foundNames","indexOf","name","nodes","values","toArray","metaValue","stripTags","isGoodNode","maxChildren","withinComment","extractFromSelectors","selectors","textOnly","html","cleanText","commentParent","class","undefined","nodeIsSufficient","isWordpress","CLEAN_AUTHOR_RE","TEXT_LINK_RE","MS_DATE_STRING","SEC_DATE_STRING","CLEAN_DATE_STRING_RE","TIME_MERIDIAN_SPACE_RE","TIME_MERIDIAN_DOTS_RE","months","allMonths","timestamp1","timestamp2","SPLIT_DATE_STRING","TITLE_SPLITTERS_RE","DOMAIN_ENDINGS_RE","cleanAuthor","author","leadImageUrl","validUrl","isWebUri","cleanDek","dek","dekText","cleanDateString","dateString","cleanDatePublished","date","moment","Date","isValid","toISOString","extractCleanNode","cleanConditionally","cleanTitle","resolveSplitTitle","h1","extractBreadcrumbTitle","splitTitle","termCounts","titleText","maxTerm","termCount","splitEnds","longestEnd","cleanDomainFromTitle","nakedDomain","startSlug","startSlugRatio","wuzzy","levenshtein","endSlug","endSlugRatio","newTitle","Cleaners","cleanImage","cleanContent","extractBestNode","opts","$topCandidate","GenericContentExtractor","defaultOpts","getContentNode","cleanAndReturnNode","k","STRONG_TITLE_META_TAGS","WEAK_TITLE_META_TAGS","STRONG_TITLE_SELECTORS","WEAK_TITLE_SELECTORS","GenericTitleExtractor","metaCache","AUTHOR_META_TAGS","AUTHOR_MAX_LENGTH","AUTHOR_SELECTORS","bylineRe","BYLINE_SELECTORS_RE","GenericAuthorExtractor","regex","DATE_PUBLISHED_META_TAGS","DATE_PUBLISHED_SELECTORS","abbrevMonthsStr","DATE_PUBLISHED_URL_RES","GenericDatePublishedExtractor","datePublished","GenericDekExtractor","LEAD_IMAGE_URL_META_TAGS","LEAD_IMAGE_URL_SELECTORS","POSITIVE_LEAD_IMAGE_URL_HINTS","POSITIVE_LEAD_IMAGE_URL_HINTS_RE","NEGATIVE_LEAD_IMAGE_URL_HINTS","NEGATIVE_LEAD_IMAGE_URL_HINTS_RE","GIF_RE","JPG_RE","getSig","scoreImageUrl","scoreAttr","scoreByParents","$figParent","$gParent","scoreBySibling","scoreByDimensions","src","area","round","scoreByPosition","$imgs","GenericLeadImageUrlExtractor","cleanUrl","imageUrl","imgs","imgScores","topUrl","href","scoreSimilarity","articleUrl","similarity","difflib","SequenceMatcher","ratio","diffPercent","diffModifier","scoreLinkText","linkTextAsNum","scorePageInLink","isWp","DIGIT_RE","NEXT_LINK_TEXT_RE","CAP_LINK_TEXT_RE","PREV_LINK_TEXT_RE","scoreExtraneousLinks","makeSig","$link","positiveMatch","negativeMatch","parentData","scorePrevLink","linkData","shouldScore","baseUrl","previousUrls","linkHost","fragment","scoreBaseUrl","baseRegex","scoreNextLinkText","scoreCapLinks","makeBaseRegex","scoreLinks","links","scoredPages","possiblePages","link","possiblePage","GenericNextPageUrlExtractor","scoredLinks","topPage","scoredLink","CANONICAL_META_SELECTORS","parseDomain","GenericUrlExtractor","$canonical","metaUrl","EXCERPT_META_SELECTORS","maxLength","ellipsize","ellipse","GenericExcerptExtractor","excerpt","shortContent","GenericWordCountExtractor","GenericExtractor","extract","bind","stringDirection","getDirection","date_published","lead_image_url","next_page_url","word_count","direction","url_and_domain","domain","getExtractor","baseDomain","ATTR_RE","cleanBySelectors","transformElements","transforms","$matches","select","extractionOpts","extractHtml","defaultCleaner","matchingSelector","wrap","extractResult","extractor","RootExtractor","contentOnly","extractedTitle","Extractor","pages","create","extractorOpts","nextPageResult","collectAllPages","Mercury","fetchAllPages"],"mappings":";;;;;;;;;;;;;;;eAAyBA;;AAAzB,AAAe,SAAUA,KAAV;MAAgBC,KAAhB,yDAAwB,CAAxB;MAA2BC,GAA3B,yDAAiC,CAAjC;;;;;gBACND,SAASC,GADH;;;;;;iBAELD,SAAS,CAFJ;;;;;;;;;;;;;;ACAf;AACA,AAAe,SAASE,WAAT,OAAmC;MAAZC,QAAY,QAAZA,QAAY;;;SAEzC,CAAC,CAACA,QAAT;;;ACHF,IAAMC,SAAS;UACL;WACC,IADD;cAEI;;CAHd,CAOA;;ACPO,IAAMC,kBAAkB;gBACf;CADT;;;AAKP,AAAO,IAAMC,gBAAgB,KAAtB;;;AAGP,IAAMC,oBAAoB,CACxB,YADwB,EAExB,WAFwB,EAGxB,YAHwB,EAIxB,WAJwB,CAA1B;;AAOA,AAAO,IAAMC,uBAAuB,IAAIC,MAAJ,QAAgBF,kBAAkBG,IAAlB,CAAuB,GAAvB,CAAhB,SAAiD,GAAjD,CAA7B;;;;AAKP,AAAO,IAAMC,qBAAqB,OAA3B,CAEP,AAIA,AAKA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AClBA,SAASC,GAAT,CAAaC,OAAb,EAAsB;SACb,IAAIC,OAAJ,CAAY,UAACC,OAAD,EAAUC,MAAV,EAAqB;YAC9BH,OAAR,EAAiB,UAACI,GAAD,EAAMC,QAAN,EAAgBC,IAAhB,EAAyB;UACpCF,GAAJ,EAAS;eACAA,GAAP;OADF,MAEO;gBACG,EAAEE,UAAF,EAAQD,kBAAR,EAAR;;KAJJ;GADK,CAAP;;;;;;;;AAgBF,AAAO,SAASE,gBAAT,CAA0BF,QAA1B,EAAyD;MAArBG,WAAqB,yDAAP,KAAO;;;MAE1DH,SAASI,aAAT,KAA2B,IAA/B,EAAqC;QAC/B,CAACJ,SAASK,UAAd,EAA0B;YAClB,IAAIC,KAAJ,sDAC+CN,SAASO,KADxD,CAAN;KADF,MAIO,IAAI,CAACJ,WAAL,EAAkB;YACjB,IAAIG,KAAJ,kDAC2CN,SAASK,UADpD,wEAAN;;;;0BASAL,SAASQ,OAjBiD;MAe5CC,WAf4C,qBAe5D,cAf4D;MAgB1CC,aAhB0C,qBAgB5D,gBAhB4D;;;;MAoB1DpB,qBAAqBqB,IAArB,CAA0BF,WAA1B,CAAJ,EAA4C;UACpC,IAAIH,KAAJ,yCACkCG,WADlC,0BAAN;;;;MAMEC,gBAAgBjB,kBAApB,EAAwC;UAChC,IAAIa,KAAJ,yEACkEb,kBADlE,OAAN;;;SAKK,IAAP;;;AAGF,AAMA;;;;;;AAMA;uDAAe,iBAA6BmB,GAA7B,EAAkCC,SAAlC;;;;;;;wBACDA,aAAaC,IAAIC,KAAJ,CAAUC,UAAUJ,GAAV,CAAV,CAAzB;;mBADa,GAGG;mBACTC,SADS;oCAEA1B,eAAd,CAFc;uBAGLC,aAHK;;;wBAMJ,IANI;;mBAQT,IARS;;oBAUR,IAVQ;;kCAYM;aAfT;;mBAkBoBM,IAAIC,OAAJ,CAlBpB;;;;oBAAA,SAkBLK,QAlBK;gBAAA,SAkBKC,IAlBL;;;6BAqBMD,QAAjB;6CACO,EAAEC,UAAF,EAAQD,kBAAR,EAtBI;;;;;6CAwBJd,OAAO+B,MAxBH;;;;;;;;GAAf;;WAA8BC,aAA9B;;;;SAA8BA,aAA9B;;;AC9EA,SAASC,eAAT,CAAyBC,CAAzB,EAA4BC,IAA5B,EAAkCC,EAAlC,EAAsC;cAC1BD,IAAV,QAAmBE,IAAnB,CAAwB,UAACC,CAAD,EAAIC,IAAJ,EAAa;QAC7BC,QAAQN,EAAEK,IAAF,CAAd;;QAEME,QAAQD,MAAME,IAAN,CAAWP,IAAX,CAAd;UACMO,IAAN,CAAWN,EAAX,EAAeK,KAAf;UACME,UAAN,CAAiBR,IAAjB;GALF;;SAQOD,CAAP;;;;;;;;;;AAUF,AAAe,SAASU,iBAAT,CAA2BV,CAA3B,EAA8B;MACvCD,gBAAgBC,CAAhB,EAAmB,SAAnB,EAA8B,OAA9B,CAAJ;MACID,gBAAgBC,CAAhB,EAAmB,UAAnB,EAA+B,MAA/B,CAAJ;SACOA,CAAP;;;ACtBK,IAAMW,UAAU,IAAIxC,MAAJ,CAAW,WAAX,EAAwB,GAAxB,CAAhB;AACP,AAAO,IAAMyC,WAAW,IAAIzC,MAAJ,CAAW,kBAAX,EAA+B,GAA/B,CAAjB;;AAEP,AAAO,IAAM0C,iBAAiB,CAC5B,QAD4B,EAE5B,OAF4B,EAG5B,MAH4B,EAI5BzC,IAJ4B,CAIvB,GAJuB,CAAvB;;ACIP;;;;;AAKA,AAAe,SAAS0C,uBAAT,CAAiCd,CAAjC,EAAoC;IAC/C,KAAF,EAASG,IAAT,CAAc,UAACC,CAAD,EAAIW,GAAJ,EAAY;YAChBC,OAAR,CAAgBD,IAAIE,OAApB,EAA6BC,OAA7B,CAAqC,UAACV,IAAD,EAAU;UACvCD,QAAQQ,IAAIE,OAAJ,CAAYT,IAAZ,CAAd;;UAEIA,SAAS,KAAT,IAAkBG,QAAQpB,IAAR,CAAagB,KAAb,CAAlB,IACAK,SAASrB,IAAT,CAAcgB,KAAd,CADJ,EAC0B;UACtBQ,GAAF,EAAOP,IAAP,CAAY,KAAZ,EAAmBD,KAAnB;;KALJ;GADF;;SAWOP,CAAP;;;ACtBF,SAASmB,SAAT,CAAmBC,KAAnB,EAA0Bf,IAA1B,EAAgC;SACvBA,KAAKgB,IAAL,KAAc,SAArB;;;AAGF,SAASC,aAAT,CAAuBtB,CAAvB,EAA0B;IACtBuB,IAAF,GAASC,IAAT,CAAc,GAAd,EACSC,QADT,GAESC,MAFT,CAEgBP,SAFhB,EAGSQ,MAHT;;SAKO3B,CAAP;;;AAGF,AAAe,SAAS4B,KAAT,CAAe5B,CAAf,EAAkB;IAC7Ba,cAAF,EAAkBc,MAAlB;;MAEIL,cAActB,CAAd,CAAJ;SACOA,CAAP;;;ACRF,IAAM6B,WAAW;;;;;;;;QAAA,kBAQFrC,GARE,EAQGsC,gBARH,EAQqBrC,SARrB,EAQgC;;;;;;;;;oBAAA;;mBAGzCqC,gBAHyC;;;;;2BAAA,GAIrB;+BACL,IADK;4BAER,GAFQ;yBAGX;kCACS,WADT;oCAEW;;eATqB;;;uBAalC,EAAEjD,MAAMiD,gBAAR,EAA0BlD,UAAUmD,aAApC,EAAT;;;;;;qBAEejC,cAAcN,GAAd,EAAmBC,SAAnB,CAf4B;;;oBAAA;;;mBAkBzCuC,OAAO7C,KAlBkC;;;;;+CAmBpC6C,MAnBoC;;;+CAsBtC,MAAKC,WAAL,CAAiBD,MAAjB,CAtBsC;;;;;;;;;GARhC;aAAA,6BAiC0B;QAArBE,OAAqB,QAA3BrD,IAA2B;QAAZD,QAAY,QAAZA,QAAY;QACfS,WADe,GACCT,SAASQ,OADV,CAC/B,cAD+B;;;;;QAKnC,CAACC,YAAY8C,QAAZ,CAAqB,MAArB,CAAD,IACA,CAAC9C,YAAY8C,QAAZ,CAAqB,MAArB,CADL,EACmC;YAC3B,IAAIjD,KAAJ,CAAU,qCAAV,CAAN;;;QAGEc,IAAIoC,QAAQC,IAAR,CAAaH,OAAb,EAAsB,EAAEI,qBAAqB,IAAvB,EAAtB,CAAR;;QAEItC,EAAEuB,IAAF,GAASgB,QAAT,GAAoBC,MAApB,KAA+B,CAAnC,EAAsC;YAC9B,IAAItD,KAAJ,CAAU,kCAAV,CAAN;;;QAGEwB,kBAAkBV,CAAlB,CAAJ;QACIc,wBAAwBd,CAAxB,CAAJ;QACI4B,MAAM5B,CAAN,CAAJ;;WAEOA,CAAP;;CArDJ,CAyDA;;ACpEA,IAAMyC,iBAAiB;UACb,WADa;WAEZ;;eAEI,CACT,qBADS,EAET,cAFS,EAGT,iBAHS,CAFJ;;;WASA,CACL,KADK,EAEL,uBAFK,CATA;;;;;;;;gBAoBK;;UAEN,IAFM;;;gBAKA,kBAACnC,KAAD,EAAW;YACboC,YAAYpC,MAAMiC,QAAN,EAAlB;YACIG,UAAUF,MAAV,KAAqB,CAArB,IAA0BE,UAAUpE,GAAV,CAAc,CAAd,EAAiBqE,OAAjB,KAA6B,KAA3D,EAAkE;iBACzD,QAAP;;;eAGK,IAAP;;;GAjCe;;SAsCd;eACM,CACT,uBADS,EAET,qBAFS,EAGT,IAHS;GAvCQ;;UA8Cb;eACK,CACT,aADS,EAET,sBAFS;GA/CQ;;OAqDhB;eACQ,CACT,sBADS;GAtDQ;;kBA2DL;eACH,CACT,kCADS,EAET,wBAFS;;CA5Df,CAmEA;;ACnEA,IAAMC,mBAAmB;UACf,cADe;WAEd;;;;eAII,CACT,wBADS,CAJJ;;;WASA,EATA;;;gBAaK;gBACA;;GAhBS;;UAoBf;eACK,CACT,mBADS;GArBU;;SA0BhB;eACM,CACT,UADS;GA3BU;;kBAgCP;eACH,CACT,kBADS;;CAjCf,CAuCA;;ACvCA,IAAMC,qBAAqB;UACjB,eADiB;WAEhB;eACI,CACT,kBADS,CADJ;;oBAKS,KALT;;;gBAQK;sBACM,oBAACvC,KAAD,EAAW;YACnBwC,UAAUxC,MAAMyC,OAAN,CAAc,UAAd,CAAhB;;YAEID,QAAQP,QAAR,CAAiB,KAAjB,EAAwBC,MAAxB,KAAmC,CAAvC,EAA0C;kBAChCQ,OAAR,CAAgB1C,KAAhB;;OALM;0BAQU,YARV;kBASE;KAjBP;;;WAqBA,CACL,iBADK,EAEL,oCAFK,EAGL,MAHK;;GAvBgB;;UA+BjB,wBA/BiB;;SAiClB;eACM,CACT,UADS;GAlCY;;kBAuCT;eACH,CACT,sBADS;;;CAxCf,CA+CA;;AC/CA,IAAM2C,mBAAmB;UACf,aADe;;WAGd;gBACK;;;;;+BAKe,2BAAC3C,KAAD,EAAQN,CAAR,EAAc;YAC/BkD,SAAS5C,MAAMkB,IAAN,CAAW,QAAX,CAAf;YACM2B,kBAAkBnD,EAAE,iCAAF,CAAxB;wBACgBoD,MAAhB,CAAuBF,MAAvB;cACMG,WAAN,CAAkBF,eAAlB;OATQ;;;;SAcP;KAfE;;eAkBI,CACT,uBADS,CAlBJ;;oBAsBS,KAtBT;;WAwBA,CACL,qBADK,EAEL,QAFK,EAGL,sBAHK;GA3Bc;;UAkCf;eACK,CACT,kCADS;GAnCU;;kBAwCP;eACH,CACT,4CADS;;;CAzCf,CAkDA;;AC7CA,IAAMG,aAAa;eACJb,cADI;kBAEDG,gBAFC;mBAGAC,kBAHA;iBAIFI;CAJjB,CAOA;;ACZA;AACA,AAAO,IAAMM,YAAY,IAAIpF,MAAJ,CAAW,gCAAX,EAA6C,GAA7C,CAAlB;;;AAGP,AAAO,IAAMqF,oBAAoB,CAC/B,OAD+B,EAE/B,QAF+B,EAG/B,UAH+B,EAI/B,MAJ+B,EAK/B,OAL+B,EAM/B,IAN+B,EAO/B,OAP+B,EAQ/B,QAR+B,EAS/B,QAT+B,CAA1B;;;AAaP,AAAO,IAAMC,eAAe,CAAC,OAAD,EAAU,OAAV,CAArB;AACP,AAAO,IAAMC,wBAAwBD,aAAaE,GAAb,CAAiB;eAAgBC,QAAhB;CAAjB,CAA9B;AACP,AAAO,IAAMC,mBAAmBJ,aAAarF,IAAb,CAAkB,GAAlB,CAAzB;AACP,AAAO,IAAM0F,kBAAkB,CAAC,KAAD,EAAQ,MAAR,EAAgB,OAAhB,EAAyB,IAAzB,EAA+B,OAA/B,CAAxB;AACP,AAAO,IAAMC,qBAAqB,IAAI5F,MAAJ,QAAgB2F,gBAAgB1F,IAAhB,CAAqB,GAArB,CAAhB,SAA+C,GAA/C,CAA3B;;;AAGP,AAAO,IAAM4F,oBAAoB,CAAC,GAAD,CAA1B;AACP,AAAO,IAAMC,yBAAyBD,kBAAkBL,GAAlB,CAAsB;SAAUO,GAAV;CAAtB,EAA6C9F,IAA7C,CAAkD,GAAlD,CAA/B;;;AAGP,AAAO,IAAM+F,2BAA2B,CAAC,IAAD,EAAO,IAAP,EAAa,OAAb,EAAsB,KAAtB,EAA6B,QAA7B,EAAuC,MAAvC,EAA+C/F,IAA/C,CAAoD,GAApD,CAAjC;;;AAGP,IAAMgG,cAAc,CAAC,IAAD,EAAO,IAAP,EAAa,IAAb,EAAmB,IAAnB,EAAyB,IAAzB,CAApB;AACA,AAAO,IAAMC,kBAAkBD,YAAYhG,IAAZ,CAAiB,GAAjB,CAAxB;;;;;;;;AASP,AAAO,IAAMkG,gCAAgC,CAC3C,UAD2C,EAE3C,OAF2C,EAG3C,QAH2C,EAI3C,SAJ2C,EAK3C,SAL2C,EAM3C,KAN2C,EAO3C,gBAP2C,EAQ3C,OAR2C,EAS3C,SAT2C,EAU3C,cAV2C,EAW3C,QAX2C,EAY3C,iBAZ2C,EAa3C,OAb2C,EAc3C,MAd2C;;AAgB3C,QAhB2C,EAiB3C,QAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C;AAoB3C,MApB2C,EAqB3C,MArB2C,EAsB3C,KAtB2C,EAuB3C,UAvB2C,EAwB3C,OAxB2C,EAyB3C,YAzB2C,EA0B3C,UA1B2C;AA2B3C,2BA3B2C;AA4B3C,OA5B2C,EA6B3C,eA7B2C,EA8B3C,SA9B2C,EA+B3C,QA/B2C,EAgC3C,QAhC2C,EAiC3C,KAjC2C,EAkC3C,OAlC2C,EAmC3C,UAnC2C,EAoC3C,SApC2C,EAqC3C,UArC2C,EAsC3C,SAtC2C,EAuC3C,SAvC2C,EAwC3C,OAxC2C,CAAtC;;;;;;;;;;;;;AAsDP,AAAO,IAAMC,gCAAgC,CAC3C,KAD2C,EAE3C,SAF2C,EAG3C,MAH2C,EAI3C,WAJ2C,EAK3C,QAL2C,EAM3C,SAN2C,EAO3C,qBAP2C,EAQ3C,QAR2C;AAS3C,OAT2C,EAU3C,QAV2C,EAW3C,OAX2C,EAY3C,MAZ2C,EAa3C,MAb2C,EAc3C,OAd2C,EAe3C,QAf2C,CAAtC;;;;;AAqBP,AAAO,IAAMC,sBAAsB,CACjC,GADiC,EAEjC,YAFiC,EAGjC,IAHiC,EAIjC,KAJiC,EAKjC,KALiC,EAMjC,GANiC,EAOjC,KAPiC,EAQjC,OARiC,EASjCpG,IATiC,CAS5B,GAT4B,CAA5B;;;;AAaP,AAAO,IAAMqG,yBAAyB,CACpC,IADoC,EAEpC,GAFoC,EAGpC,GAHoC,EAIpC,OAJoC,EAKpC,IALoC,EAMpC,MANoC,EAOpC,MAPoC,EAQpC,UARoC,EASpC,OAToC,EAUpC,KAVoC,EAWpC,MAXoC,EAYpC,MAZoC,CAA/B;;AAeP,AAAO,IAAMC,4BACX,IAAIvG,MAAJ,QAAgBsG,uBAAuBrG,IAAvB,CAA4B,GAA5B,CAAhB,SAAsD,GAAtD,CADK;;AAGP,AAYA,AAAO,IAAMuG,cAAc,CACzB,QADyB,EAEzB,OAFyB,EAGzB,OAHyB,EAIzB,SAJyB,CAApB;AAMP,AAAO,IAAMC,iBAAiB,IAAIzG,MAAJ,CAAWwG,YAAYvG,IAAZ,CAAiB,GAAjB,CAAX,EAAkC,GAAlC,CAAvB;;;;;;AAOP,AAAO,IAAMyG,uBAAuB,CAClC,SADkC,EAElC,gBAFkC,EAGlC,iBAHkC,EAIlC,MAJkC,EAKlC,MALkC,EAMlC,SANkC,EAOlC,qBAPkC,EAQlC,OARkC,EASlC,QATkC,EAUlC,MAVkC,EAWlC,QAXkC,EAYlC,MAZkC,EAalC,YAbkC,EAclC,WAdkC,EAelC,MAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,UAlBkC;AAmBlC,SAnBkC,CAA7B;;;AAuBP,AAAO,IAAMC,oBAAoB,IAAI3G,MAAJ,CAAW0G,qBAAqBzG,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;AAEP,AAGA;;;;AAIA,AAAO,IAAM2G,uBAAuB,CAClC,OADkC,EAElC,QAFkC,EAGlC,QAHkC,EAIlC,KAJkC,EAKlC,UALkC,EAMlC,QANkC,EAOlC,QAPkC,EAQlC,OARkC,EASlC,MATkC,EAUlC,OAVkC,EAWlC,SAXkC,EAYlC,YAZkC,EAalC,SAbkC,EAclC,MAdkC,EAelC,QAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,MAlBkC,EAmBlC,SAnBkC,EAoBlC,UApBkC;AAqBlC,MArBkC,EAsBlC,QAtBkC,EAuBlC,UAvBkC,EAwBlC,MAxBkC,EAyBlC,MAzBkC,EA0BlC,MA1BkC,EA2BlC,UA3BkC;AA4BlC,mBA5BkC,EA6BlC,MA7BkC,EA8BlC,WA9BkC,EA+BlC,MA/BkC,EAgClC,UAhCkC,EAiClC,OAjCkC,EAkClC,MAlCkC,EAmClC,OAnCkC,EAoClC,UApCkC;AAqClC,OArCkC,EAsClC,KAtCkC;AAuClC,SAvCkC,EAwClC,SAxCkC,EAyClC,cAzCkC;AA0ClC,QA1CkC,EA2ClC,WA3CkC,EA4ClC,OA5CkC,EA6ClC,UA7CkC,EA8ClC,UA9CkC,EA+ClC,MA/CkC,EAgDlC,SAhDkC,EAiDlC,SAjDkC,EAkDlC,OAlDkC,EAmDlC,KAnDkC,EAoDlC,SApDkC,EAqDlC,MArDkC,EAsDlC,OAtDkC,EAuDlC,QAvDkC,CAA7B;;AA0DP,AAAO,IAAMC,oBAAoB,IAAI7G,MAAJ,CAAW4G,qBAAqB3G,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO,IAAM6G,iBAAiB,wCAAvB;;AAEP,AAGA;;AAEA,AAAO,IAAMC,wBAAwB,CACnC,OADmC,EAEnC,SAFmC,EAGnC,SAHmC,EAInC,SAJmC,EAKnC,QALmC,EAMnC,OANmC,EAOnC,OAPmC,EAQnC,OARmC,EASnC,KATmC,EAUnC,OAVmC,EAWnC,MAXmC,EAYnC,QAZmC,EAanC,KAbmC,EAcnC,iBAdmC,CAA9B;AAgBP,AAAO,IAAMC,2BAA2B,IAAIhH,MAAJ,CAAW+G,sBAAsB9G,IAAtB,CAA2B,GAA3B,CAAX,EAA4C,GAA5C,CAAjC;;;AAGP,AAAO,IAAMgH,UAAU,IAAIjH,MAAJ,CAAW,iBAAX,EAA8B,GAA9B,CAAhB;;AAEP,AAMA,AAIA,AAIA,AAGA,AAGA;;AAEA,AAAO,IAAMkH,mBAAmB,CAC9B,SAD8B,EAE9B,OAF8B,EAG9B,YAH8B,EAI9B,MAJ8B,EAK9B,IAL8B,EAM9B,QAN8B,EAO9B,QAP8B,EAQ9B,SAR8B,EAS9B,KAT8B,EAU9B,UAV8B,EAW9B,IAX8B,EAY9B,KAZ8B,EAa9B,IAb8B,EAc9B,IAd8B,EAe9B,OAf8B,EAgB9B,UAhB8B,EAiB9B,YAjB8B,EAkB9B,QAlB8B,EAmB9B,QAnB8B,EAoB9B,MApB8B,EAqB9B,IArB8B,EAsB9B,IAtB8B,EAuB9B,IAvB8B,EAwB9B,IAxB8B,EAyB9B,IAzB8B,EA0B9B,IA1B8B,EA2B9B,QA3B8B,EA4B9B,QA5B8B,EA6B9B,IA7B8B,EA8B9B,IA9B8B,EA+B9B,KA/B8B,EAgC9B,QAhC8B,EAiC9B,IAjC8B,EAkC9B,QAlC8B,EAmC9B,GAnC8B,EAoC9B,KApC8B,EAqC9B,UArC8B,EAsC9B,SAtC8B,EAuC9B,OAvC8B,EAwC9B,OAxC8B,EAyC9B,UAzC8B,EA0C9B,OA1C8B,EA2C9B,IA3C8B,EA4C9B,OA5C8B,EA6C9B,IA7C8B,EA8C9B,IA9C8B,EA+C9B,OA/C8B,CAAzB;AAiDP,AAAO,IAAMC,sBAAsB,IAAInH,MAAJ,QAAgBkH,iBAAiBjH,IAAjB,CAAsB,GAAtB,CAAhB,SAAgD,GAAhD,CAA5B;;;;;;AAOP,IAAMmH,sBAAsBjB,8BAA8BlG,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,IAAMoH,uBAAuB,IAAIrH,MAAJ,CAAWoH,mBAAX,EAAgC,GAAhC,CAA7B;;AAEP,IAAME,sBAAsBlB,8BAA8BnG,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,IAAMsH,uBAAuB,IAAIvH,MAAJ,CAAWsH,mBAAX,EAAgC,GAAhC,CAA7B,CAEP,AAGA,AACA,AACA,AAEA;;AC3Xe,SAASE,uBAAT,CAAiC3F,CAAjC,EAAoC;;;;;;;;;;IAU/C,GAAF,EAAO4F,GAAP,CAAW,GAAX,EAAgBzF,IAAhB,CAAqB,UAACiB,KAAD,EAAQf,IAAR,EAAiB;QAC9BC,QAAQN,EAAEK,IAAF,CAAd;QACMwF,UAAUvF,MAAME,IAAN,CAAW,OAAX,CAAhB;QACMsF,KAAKxF,MAAME,IAAN,CAAW,IAAX,CAAX;QACI,CAACsF,EAAD,IAAO,CAACD,OAAZ,EAAqB;;QAEfE,cAAgBF,WAAW,EAA3B,WAAiCC,MAAM,EAAvC,CAAN;QACIJ,qBAAqBnG,IAArB,CAA0BwG,UAA1B,CAAJ,EAA2C;;KAA3C,MAEO,IAAIP,qBAAqBjG,IAArB,CAA0BwG,UAA1B,CAAJ,EAA2C;YAC1CpE,MAAN;;GAVJ;;SAcO3B,CAAP;;;AC3BF;;;;;;;;;;AAUA,AAAe,SAASgG,OAAT,CAAiBhG,CAAjB,EAAoB;MAC7BiG,aAAa,KAAjB;IACE,IAAF,EAAQ9F,IAAR,CAAa,UAACiB,KAAD,EAAQ8E,OAAR,EAAoB;QACzBC,cAAcnG,EAAEkG,OAAF,EAAWE,IAAX,GAAkB9H,GAAlB,CAAsB,CAAtB,CAApB;;QAEI6H,eAAeA,YAAYxD,OAAZ,KAAwB,IAA3C,EAAiD;mBAClC,IAAb;QACEuD,OAAF,EAAWvE,MAAX;KAFF,MAGO,IAAIsE,UAAJ,EAAgB;mBACR,KAAb;;mBAEaC,OAAb,EAAsBlG,CAAtB,EAAyB,IAAzB;;GATJ;;SAaOA,CAAP;;;ACzBF;;;;;;;;;;;AAWA,AAAe,SAASqG,YAAT,CAAsBhG,IAAtB,EAA4BL,CAA5B,EAA2C;MAAZsG,EAAY,yDAAP,KAAO;;MAClDhG,QAAQN,EAAEK,IAAF,CAAd;;MAEIiG,EAAJ,EAAQ;QACFC,UAAUlG,KAAKmG,WAAnB;QACMC,IAAIzG,EAAE,SAAF,CAAV;;;;WAIOuG,WAAW,EAAEA,QAAQ5D,OAAR,IAAmB2C,oBAAoB/F,IAApB,CAAyBgH,QAAQ5D,OAAjC,CAArB,CAAlB,EAAmF;UAC3E6D,cAAcD,QAAQC,WAA5B;QACED,OAAF,EAAWG,QAAX,CAAoBD,CAApB;gBACUD,WAAV;;;UAGInD,WAAN,CAAkBoD,CAAlB;UACM9E,MAAN;WACO3B,CAAP;;;SAGKA,CAAP;;;AC7BF,SAAS2G,WAAT,CAAqB3G,CAArB,EAAwB;IACpB,KAAF,EAASG,IAAT,CAAc,UAACiB,KAAD,EAAQwF,GAAR,EAAgB;QACtBC,OAAO7G,EAAE4G,GAAF,CAAb;QACME,cAAcD,KAAKtE,QAAL,CAAciC,mBAAd,EAAmChC,MAAnC,KAA8C,CAAlE;;QAEIsE,WAAJ,EAAiB;oBACDD,IAAd,EAAoB7G,CAApB,EAAuB,GAAvB;;GALJ;;SASOA,CAAP;;;AAGF,SAAS+G,YAAT,CAAsB/G,CAAtB,EAAyB;IACrB,MAAF,EAAUG,IAAV,CAAe,UAACiB,KAAD,EAAQ4F,IAAR,EAAiB;QACxBC,QAAQjH,EAAEgH,IAAF,CAAd;QACMF,cAAcG,MAAMlE,OAAN,CAAc,QAAd,EAAwBP,MAAxB,KAAmC,CAAvD;QACIsE,WAAJ,EAAiB;oBACDG,KAAd,EAAqBjH,CAArB,EAAwB,GAAxB;;GAJJ;;SAQOA,CAAP;;;;;;;;;;;;;;;AAeF,AAAe,SAASkH,mBAAT,CAA6BlH,CAA7B,EAAgC;MACzCgG,QAAQhG,CAAR,CAAJ;MACI2G,YAAY3G,CAAZ,CAAJ;MACI+G,aAAa/G,CAAb,CAAJ;;SAEOA,CAAP;;;AC5Ca,SAASmH,aAAT,CAAuB7G,KAAvB,EAA8BN,CAA9B,EAA4C;MAAXkE,GAAW,yDAAL,GAAK;;MACnD7D,OAAOC,MAAMhC,GAAN,CAAU,CAAV,CAAb;MACI,CAAC+B,IAAL,EAAW;WACFL,CAAP;;;mBAEkBM,MAAMhC,GAAN,CAAU,CAAV,CALqC;;MAKjD2C,OALiD,cAKjDA,OALiD;;MAMnDmG,eAAeC,QAAQrG,OAAR,CAAgBC,OAAhB,EACQ0C,GADR,CACY;WAAU2D,GAAV,SAAiBrG,QAAQqG,GAAR,CAAjB;GADZ,EAEQlJ,IAFR,CAEa,GAFb,CAArB;;QAIMiF,WAAN,OAAsBa,GAAtB,SAA6BkD,YAA7B,SAA6C9G,MAAMmB,QAAN,EAA7C,UAAkEyC,GAAlE;SACOlE,CAAP;;;ACXF,SAASuH,cAAT,CAAwBC,IAAxB,EAA8BxH,CAA9B,EAAiC;MACzByH,SAASC,SAASF,KAAKhH,IAAL,CAAU,QAAV,CAAT,EAA8B,EAA9B,CAAf;MACMmH,QAAQD,SAASF,KAAKhH,IAAL,CAAU,OAAV,CAAT,EAA6B,EAA7B,KAAoC,EAAlD;;;;;MAKI,CAACiH,UAAU,EAAX,IAAiB,EAAjB,IAAuBE,QAAQ,EAAnC,EAAuC;SAChChG,MAAL;GADF,MAEO,IAAI8F,MAAJ,EAAY;;;;SAIZhH,UAAL,CAAgB,QAAhB;;;SAGKT,CAAP;;;;;AAKF,SAAS4H,aAAT,CAAuBJ,IAAvB,EAA6BxH,CAA7B,EAAgC;MAC1BuD,UAAUhE,IAAV,CAAeiI,KAAKhH,IAAL,CAAU,KAAV,CAAf,CAAJ,EAAsC;SAC/BmB,MAAL;;;SAGK3B,CAAP;;;AAGF,AAAe,SAAS6H,WAAT,CAAqBC,QAArB,EAA+B9H,CAA/B,EAAkC;WACtCwB,IAAT,CAAc,KAAd,EAAqBrB,IAArB,CAA0B,UAACiB,KAAD,EAAQL,GAAR,EAAgB;QAClCyG,OAAOxH,EAAEe,GAAF,CAAb;;mBAEeyG,IAAf,EAAqBxH,CAArB;kBACcwH,IAAd,EAAoBxH,CAApB;GAJF;;SAOOA,CAAP;;;ACnCa,SAAS+H,aAAT,CAAuBC,OAAvB,EAAgChI,CAAhC,EAAmC;IAC9CwD,kBAAkBpF,IAAlB,CAAuB,GAAvB,CAAF,EAA+B4J,OAA/B,EAAwCrG,MAAxC;;SAEO3B,CAAP;;;ACLF;;;;AAGA,AAAe,SAASiI,UAAT,CAAoBD,OAApB,EAA6BhI,CAA7B,EAAgC;MACvCkI,SAASlI,EAAE,IAAF,EAAQgI,OAAR,CAAf;;MAEIE,OAAO1F,MAAP,GAAgB,CAApB,EAAuB;WACdrC,IAAP,CAAY,UAACiB,KAAD,EAAQf,IAAR;aAAiBL,EAAEK,IAAF,EAAQsB,MAAR,EAAjB;KAAZ;GADF,MAEO;WACExB,IAAP,CAAY,UAACiB,KAAD,EAAQf,IAAR,EAAiB;oBACbL,EAAEK,IAAF,CAAd,EAAuBL,CAAvB,EAA0B,IAA1B;KADF;;;SAKKA,CAAP;;;ACZF,SAASmI,qBAAT,CAA+BL,QAA/B,EAAyC;;WAE9BtG,IAAT,CAAc,GAAd,EAAmBrB,IAAnB,CAAwB,UAACiB,KAAD,EAAQf,IAAR,EAAiB;SAClCY,OAAL,GAAeoG,QAAQrG,OAAR,CAAgBX,KAAKY,OAArB,EAA8BmH,MAA9B,CAAqC,UAACC,GAAD,EAAM7H,IAAN,EAAe;UAC7DuD,mBAAmBxE,IAAnB,CAAwBiB,IAAxB,CAAJ,EAAmC;4BACrB6H,GAAZ,qBAAkB7H,IAAlB,EAAyBH,KAAKY,OAAL,CAAaT,IAAb,CAAzB;;;aAGK6H,GAAP;KALa,EAMZ,EANY,CAAf;GADF;;;;;;;;;;AAkBF,AAAe,SAASC,eAAT,CAAyBR,QAAzB,EAAmC;wBAC1BA,QAAtB;;SAEOA,QAAP;;;AC3Ba,SAASS,WAAT,CAAqBT,QAArB,EAA+B9H,CAA/B,EAAkC;WACtCwB,IAAT,CAAc,GAAd,EAAmBrB,IAAnB,CAAwB,UAACiB,KAAD,EAAQqF,CAAR,EAAc;QAC9B+B,KAAKxI,EAAEyG,CAAF,CAAX;QACI+B,GAAGC,IAAH,GAAUC,IAAV,OAAqB,EAAzB,EAA6BF,GAAG7G,MAAH;GAF/B;;SAKO3B,CAAP;;;ACNF;;;;;;AAMA,AAAO,IAAMsE,kCAAgC,CAC3C,UAD2C,EAE3C,OAF2C,EAG3C,QAH2C,EAI3C,SAJ2C,EAK3C,SAL2C,EAM3C,KAN2C,EAO3C,gBAP2C,EAQ3C,OAR2C,EAS3C,SAT2C,EAU3C,cAV2C,EAW3C,QAX2C,EAY3C,iBAZ2C,EAa3C,OAb2C,EAc3C,MAd2C,EAe3C,MAf2C,EAgB3C,QAhB2C,EAiB3C,QAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C;AAoB3C,MApB2C,EAqB3C,MArB2C,EAsB3C,KAtB2C,EAuB3C,OAvB2C,EAwB3C,YAxB2C,EAyB3C,UAzB2C;AA0B3C,2BA1B2C;AA2B3C,OA3B2C,EA4B3C,eA5B2C,EA6B3C,SA7B2C,EA8B3C,QA9B2C,EA+B3C,QA/B2C,EAgC3C,KAhC2C,EAiC3C,OAjC2C,EAkC3C,UAlC2C,EAmC3C,SAnC2C,EAoC3C,UApC2C,EAqC3C,SArC2C,EAsC3C,OAtC2C,CAAtC;;;;;;;;;;;;;AAoDP,AAAO,IAAMC,kCAAgC,CAC3C,KAD2C,EAE3C,SAF2C,EAG3C,MAH2C,EAI3C,WAJ2C,EAK3C,QAL2C,EAM3C,SAN2C,EAO3C,qBAP2C,EAQ3C,QAR2C;AAS3C,OAT2C,EAU3C,QAV2C,EAW3C,OAX2C,EAY3C,MAZ2C,EAa3C,MAb2C,EAc3C,OAd2C,EAe3C,QAf2C,CAAtC;;;;;AAqBP,AAAO,IAAMC,wBAAsB,CACjC,GADiC,EAEjC,YAFiC,EAGjC,IAHiC,EAIjC,KAJiC,EAKjC,KALiC,EAMjC,GANiC,EAOjC,KAPiC,EAQjC,OARiC,EASjCpG,IATiC,CAS5B,GAT4B,CAA5B;;;;AAaP,AAAO,IAAMqG,2BAAyB,CACpC,IADoC,EAEpC,GAFoC,EAGpC,GAHoC,EAIpC,OAJoC,EAKpC,IALoC,EAMpC,MANoC,EAOpC,MAPoC,EAQpC,UARoC,EASpC,OAToC,EAUpC,KAVoC,EAWpC,MAXoC,EAYpC,MAZoC,CAA/B;;AAeP,AAAO,IAAMC,8BACX,IAAIvG,MAAJ,QAAgBsG,yBAAuBrG,IAAvB,CAA4B,GAA5B,CAAhB,SAAsD,GAAtD,CADK;;;;;AAMP,AAAO,IAAMuK,4BAA0B,CACrC,CAAC,SAAD,EAAY,gBAAZ,CADqC,EAErC,CAAC,OAAD,EAAU,gBAAV,CAFqC,EAGrC,CAAC,QAAD,EAAW,gBAAX,CAHqC,EAIrC,CAAC,OAAD,EAAU,WAAV,CAJqC,EAKrC,CAAC,OAAD,EAAU,YAAV,CALqC,EAMrC,CAAC,OAAD,EAAU,YAAV,CANqC,CAAhC;;AASP,AAAO,IAAMhE,gBAAc,CACzB,QADyB,EAEzB,OAFyB,EAGzB,OAHyB,EAIzB,SAJyB,CAApB;AAMP,AAAO,IAAMC,mBAAiB,IAAIzG,MAAJ,CAAWwG,cAAYvG,IAAZ,CAAiB,GAAjB,CAAX,EAAkC,GAAlC,CAAvB;;;;;;AAOP,AAAO,IAAMyG,yBAAuB,CAClC,SADkC,EAElC,gBAFkC,EAGlC,iBAHkC,EAIlC,MAJkC,EAKlC,MALkC,EAMlC,SANkC,EAOlC,qBAPkC,EAQlC,OARkC,EASlC,QATkC,EAUlC,MAVkC,EAWlC,QAXkC,EAYlC,MAZkC,EAalC,YAbkC,EAclC,WAdkC,EAelC,MAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,UAlBkC;AAmBlC,SAnBkC,CAA7B;;;AAuBP,AAAO,IAAMC,sBAAoB,IAAI3G,MAAJ,CAAW0G,uBAAqBzG,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO,IAAMwK,sBAAoB,IAAIzK,MAAJ,CAAW,qBAAX,EAAkC,GAAlC,CAA1B;;;;;;AAMP,AAAO,IAAM4G,yBAAuB,CAClC,OADkC,EAElC,QAFkC,EAGlC,QAHkC,EAIlC,KAJkC,EAKlC,UALkC,EAMlC,QANkC,EAOlC,QAPkC,EAQlC,OARkC,EASlC,MATkC,EAUlC,OAVkC,EAWlC,SAXkC,EAYlC,YAZkC,EAalC,SAbkC,EAclC,MAdkC,EAelC,QAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,MAlBkC,EAmBlC,SAnBkC,EAoBlC,UApBkC;AAqBlC,MArBkC,EAsBlC,QAtBkC,EAuBlC,UAvBkC,EAwBlC,MAxBkC,EAyBlC,MAzBkC,EA0BlC,MA1BkC,EA2BlC,UA3BkC;AA4BlC,mBA5BkC,EA6BlC,MA7BkC,EA8BlC,WA9BkC,EA+BlC,MA/BkC,EAgClC,UAhCkC,EAiClC,OAjCkC,EAkClC,MAlCkC,EAmClC,OAnCkC,EAoClC,UApCkC;AAqClC,OArCkC,EAsClC,KAtCkC;AAuClC,SAvCkC,EAwClC,SAxCkC,EAyClC,cAzCkC;AA0ClC,QA1CkC,EA2ClC,WA3CkC,EA4ClC,OA5CkC,EA6ClC,UA7CkC,EA8ClC,UA9CkC,EA+ClC,MA/CkC,EAgDlC,SAhDkC,EAiDlC,SAjDkC,EAkDlC,OAlDkC,EAmDlC,KAnDkC,EAoDlC,SApDkC,EAqDlC,MArDkC,EAsDlC,OAtDkC,EAuDlC,QAvDkC,CAA7B;;AA0DP,AAAO,IAAMC,sBAAoB,IAAI7G,MAAJ,CAAW4G,uBAAqB3G,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;AAEP,AAGA,AAGA,AAGA;;AAEA,AAAO,IAAMiH,qBAAmB,CAC9B,SAD8B,EAE9B,OAF8B,EAG9B,YAH8B,EAI9B,MAJ8B,EAK9B,IAL8B,EAM9B,QAN8B,EAO9B,QAP8B,EAQ9B,SAR8B,EAS9B,KAT8B,EAU9B,UAV8B,EAW9B,IAX8B,EAY9B,KAZ8B,EAa9B,IAb8B,EAc9B,IAd8B,EAe9B,OAf8B,EAgB9B,UAhB8B,EAiB9B,YAjB8B,EAkB9B,QAlB8B,EAmB9B,QAnB8B,EAoB9B,MApB8B,EAqB9B,IArB8B,EAsB9B,IAtB8B,EAuB9B,IAvB8B,EAwB9B,IAxB8B,EAyB9B,IAzB8B,EA0B9B,IA1B8B,EA2B9B,QA3B8B,EA4B9B,QA5B8B,EA6B9B,IA7B8B,EA8B9B,IA9B8B,EA+B9B,KA/B8B,EAgC9B,QAhC8B,EAiC9B,IAjC8B,EAkC9B,QAlC8B,EAmC9B,GAnC8B,EAoC9B,KApC8B,EAqC9B,UArC8B,EAsC9B,SAtC8B,EAuC9B,OAvC8B,EAwC9B,OAxC8B,EAyC9B,UAzC8B,EA0C9B,OA1C8B,EA2C9B,IA3C8B,EA4C9B,OA5C8B,EA6C9B,IA7C8B,EA8C9B,IA9C8B,EA+C9B,OA/C8B,CAAzB;AAiDP,AAAO,IAAMC,wBAAsB,IAAInH,MAAJ,QAAgBkH,mBAAiBjH,IAAjB,CAAsB,GAAtB,CAAhB,SAAgD,GAAhD,CAA5B;;;;;;AAOP,IAAMmH,wBAAsBjB,gCAA8BlG,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAEA,IAAMqH,wBAAsBlB,gCAA8BnG,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAEA,AAGA,AAAO,IAAMyK,yBAAuB,IAAI1K,MAAJ,CAAW,mBAAX,EAAgC,GAAhC,CAA7B;AACP,AAAO,IAAM2K,uBAAqB,IAAI3K,MAAJ,CAAW,4BAAX,EAAyC,GAAzC,CAA3B;AACP,AAAO,IAAM4K,aAAW,IAAI5K,MAAJ,CAAW,kBAAX,EAA+B,GAA/B,CAAjB,CAEP;;AC3SA;AACA,AAAe,SAAS6K,SAAT,CAAmB3I,IAAnB,EAAyB;MAChCwF,UAAUxF,KAAKG,IAAL,CAAU,OAAV,CAAhB;MACMsF,KAAKzF,KAAKG,IAAL,CAAU,IAAV,CAAX;MACIyI,QAAQ,CAAZ;;MAEInD,EAAJ,EAAQ;;QAEFhB,oBAAkBvF,IAAlB,CAAuBuG,EAAvB,CAAJ,EAAgC;eACrB,EAAT;;QAEEd,oBAAkBzF,IAAlB,CAAuBuG,EAAvB,CAAJ,EAAgC;eACrB,EAAT;;;;MAIAD,OAAJ,EAAa;QACPoD,UAAU,CAAd,EAAiB;;;UAGXnE,oBAAkBvF,IAAlB,CAAuBsG,OAAvB,CAAJ,EAAqC;iBAC1B,EAAT;;UAEEb,oBAAkBzF,IAAlB,CAAuBsG,OAAvB,CAAJ,EAAqC;iBAC1B,EAAT;;;;;;;QAOAjB,iBAAerF,IAAf,CAAoBsG,OAApB,CAAJ,EAAkC;eACvB,EAAT;;;;;;;QAOE+C,oBAAkBrJ,IAAlB,CAAuBsG,OAAvB,CAAJ,EAAqC;eAC1B,EAAT;;;;SAIGoD,KAAP;;;ACpDF;;;AAGA,AAAe,SAASC,QAAT,CAAkB5I,KAAlB,EAAyB;SAC/B6I,WAAW7I,MAAME,IAAN,CAAW,OAAX,CAAX,KAAmC,IAA1C;;;ACJF;AACA,AAAe,SAAS4I,WAAT,CAAqBX,IAArB,EAA2B;SACjC,CAACA,KAAKY,KAAL,CAAW,IAAX,KAAoB,EAArB,EAAyB7G,MAAhC;;;ACFF,IAAM8G,QAAQ,IAAInL,MAAJ,CAAW,WAAX,EAAwB,GAAxB,CAAd;;AAEA,AAAe,SAASoL,WAAT,CAAqBC,UAArB,EAAgD;MAAf7G,OAAe,yDAAL,GAAK;;MACvD8G,SAASD,aAAa,EAA5B;;MAEIC,SAAS,CAAb,EAAgB;QACVC,oBAAJ;;;;;;;QAOIJ,MAAM/J,IAAN,CAAWoD,OAAX,CAAJ,EAAyB;oBACT8G,SAAS,CAAvB;KADF,MAEO;oBACSA,SAAS,IAAvB;;;WAGKE,KAAKC,GAAL,CAASD,KAAKE,GAAL,CAASH,WAAT,EAAsB,CAAtB,CAAT,EAAmC,CAAnC,CAAP;;;SAGK,CAAP;;;ACjBF;;AAEA,AAAe,SAASI,cAAT,CAAwBzJ,IAAxB,EAA8B;MACvC4I,QAAQ,CAAZ;MACMR,OAAOpI,KAAKoI,IAAL,GAAYC,IAAZ,EAAb;MACMc,aAAaf,KAAKjG,MAAxB;;;MAGIgH,aAAa,EAAjB,EAAqB;WACZ,CAAP;;;;WAIOJ,YAAYX,IAAZ,CAAT;;;;WAISc,YAAYC,UAAZ,CAAT;;;;;;MAMIf,KAAKsB,KAAL,CAAW,CAAC,CAAZ,MAAmB,GAAvB,EAA4B;aACjB,CAAT;;;SAGKd,KAAP;;;AC/Ba,SAASe,QAAT,CAAkB1J,KAAlB,EAAyBN,CAAzB,EAA4BiJ,KAA5B,EAAmC;QAC1CzI,IAAN,CAAW,OAAX,EAAoByI,KAApB;SACO3I,KAAP;;;ACEa,SAAS2J,QAAT,CAAkB3J,KAAlB,EAAyBN,CAAzB,EAA4BkK,MAA5B,EAAoC;MAC7C;QACIjB,QAAQkB,eAAe7J,KAAf,EAAsBN,CAAtB,IAA2BkK,MAAzC;aACS5J,KAAT,EAAgBN,CAAhB,EAAmBiJ,KAAnB;GAFF,CAGE,OAAOmB,CAAP,EAAU;;;;SAIL9J,KAAP;;;ACXF;AACA,AAAe,SAAS+J,WAAT,CAAqBhK,IAArB,EAA2BL,CAA3B,EAA8BiJ,KAA9B,EAAqC;MAC5CqB,SAASjK,KAAKiK,MAAL,EAAf;MACIA,MAAJ,EAAY;aACDA,MAAT,EAAiBtK,CAAjB,EAAoBiJ,QAAQ,IAA5B;;;SAGK5I,IAAP;;;ACFF;;;AAGA,AAAe,SAAS8J,cAAT,CAAwB7J,KAAxB,EAA+BN,CAA/B,EAAsD;MAApBuK,WAAoB,yDAAN,IAAM;;MAC/DtB,QAAQC,SAAS5I,KAAT,CAAZ;;MAEI2I,KAAJ,EAAW;WACFA,KAAP;;;UAGMuB,UAAUlK,KAAV,CAAR;;MAEIiK,WAAJ,EAAiB;aACNvB,UAAU1I,KAAV,CAAT;;;cAGUA,KAAZ,EAAmBN,CAAnB,EAAsBiJ,KAAtB;;SAEOA,KAAP;;;AClBF;;AAEA,AAAe,SAASuB,SAAT,CAAmBlK,KAAnB,EAA0B;mBACnBA,MAAMhC,GAAN,CAAU,CAAV,CADmB;;MAC/BqE,OAD+B,cAC/BA,OAD+B;;;;;;MAMnCkG,uBAAqBtJ,IAArB,CAA0BoD,OAA1B,CAAJ,EAAwC;WAC/BmH,eAAexJ,KAAf,CAAP;GADF,MAEO,IAAIqC,YAAY,KAAhB,EAAuB;WACrB,CAAP;GADK,MAEA,IAAImG,qBAAmBvJ,IAAnB,CAAwBoD,OAAxB,CAAJ,EAAsC;WACpC,CAAP;GADK,MAEA,IAAIoG,WAASxJ,IAAT,CAAcoD,OAAd,CAAJ,EAA4B;WAC1B,CAAC,CAAR;GADK,MAEA,IAAIA,YAAY,IAAhB,EAAsB;WACpB,CAAC,CAAR;;;SAGK,CAAP;;;ACjBF,SAASoE,cAAT,CAAsBzG,KAAtB,EAA6BN,CAA7B,EAAgC;MAC1BM,MAAMhC,GAAN,CAAU,CAAV,CAAJ,EAAkB;qBACIgC,MAAMhC,GAAN,CAAU,CAAV,CADJ;;QACRqE,OADQ,cACRA,OADQ;;;QAGZA,YAAY,MAAhB,EAAwB;;oBAERrC,KAAd,EAAqBN,CAArB,EAAwB,KAAxB;;;;;AAKN,SAASyK,UAAT,CAAoBnK,KAApB,EAA2BN,CAA3B,EAA8BiJ,KAA9B,EAAqC;MAC/B3I,KAAJ,EAAW;mBACIA,KAAb,EAAoBN,CAApB;aACSM,KAAT,EAAgBN,CAAhB,EAAmBiJ,KAAnB;;;;AAIJ,SAASyB,OAAT,CAAiB1K,CAAjB,EAAoBuK,WAApB,EAAiC;IAC7B,QAAF,EAAY3E,GAAZ,CAAgB,SAAhB,EAA2BzF,IAA3B,CAAgC,UAACiB,KAAD,EAAQf,IAAR,EAAiB;;;QAG3CC,QAAQN,EAAEK,IAAF,CAAZ;YACQ2J,SAAS1J,KAAT,EAAgBN,CAAhB,EAAmBmK,eAAe7J,KAAf,EAAsBN,CAAtB,EAAyBuK,WAAzB,CAAnB,CAAR;;QAEMzH,UAAUxC,MAAMgK,MAAN,EAAhB;QACMK,WAAWH,UAAUlK,KAAV,CAAjB;;eAEWwC,OAAX,EAAoB9C,CAApB,EAAuB2K,QAAvB,EAAiCJ,WAAjC;QACIzH,OAAJ,EAAa;;;iBAGAA,QAAQwH,MAAR,EAAX,EAA6BtK,CAA7B,EAAgC2K,WAAW,CAA3C,EAA8CJ,WAA9C;;GAbJ;;SAiBOvK,CAAP;;;;;AAKF,AAAe,SAAS4K,YAAT,CAAsB5K,CAAtB,EAA6C;MAApBuK,WAAoB,yDAAN,IAAM;;;;4BAGlCrJ,OAAxB,CAAgC,gBAAqC;;;QAAnC2J,cAAmC;QAAnBC,aAAmB;;MAC9DD,cAAL,SAAuBC,aAAvB,EAAwC3K,IAAxC,CAA6C,UAACiB,KAAD,EAAQf,IAAR,EAAiB;eACnDL,EAAEK,IAAF,EAAQiK,MAAR,CAAeO,cAAf,CAAT,EAAyC7K,CAAzC,EAA4C,EAA5C;KADF;GADF;;;;;;;UAWQA,CAAR,EAAWuK,WAAX;UACQvK,CAAR,EAAWuK,WAAX;;SAEOvK,CAAP;;;ACpEF,IAAM+K,eAAe,SAArB;;AAEA,AAAe,SAASC,eAAT,CAAyBvC,IAAzB,EAA+B;SACrCA,KAAKwC,OAAL,CAAaF,YAAb,EAA2B,GAA3B,EAAgCrC,IAAhC,EAAP;;;ACHF;;;;;AAKA,AAAe,SAASwC,cAAT,CAAwB1L,GAAxB,EAA6B2L,SAA7B,EAAwC;MAC/CC,UAAUD,UAAU3J,IAAV,CAAe;WAAM6J,GAAG9L,IAAH,CAAQC,GAAR,CAAN;GAAf,CAAhB;MACI4L,OAAJ,EAAa;WACJA,QAAQE,IAAR,CAAa9L,GAAb,EAAkB,CAAlB,CAAP;;;SAGK,IAAP;;;ACXF;;;;;;;;;;;;;;;;AAgBA,AAAO,IAAM+L,kBAAkB,IAAIpN,MAAJ,CAAW,0EAAX,EAAuF,GAAvF,CAAxB;;AAEP,AAAO,IAAMqN,eAAe,QAArB;;AAEP,AAAO,IAAMC,cAAc,WAApB;AACP,AAAO,IAAMC,cAAc,WAApB;;ACnBQ,SAASC,cAAT,CAAwBnM,GAAxB,EAA6B;MACpCoM,UAAUpM,IAAI6J,KAAJ,CAAUkC,eAAV,CAAhB;MACI,CAACK,OAAL,EAAc,OAAO,IAAP;;MAERC,UAAUnE,SAASkE,QAAQ,CAAR,CAAT,EAAqB,EAArB,CAAhB;;;;SAIOC,UAAU,GAAV,GAAgBA,OAAhB,GAA0B,IAAjC;;;ACVa,SAASC,YAAT,CAAsBtM,GAAtB,EAA2B;SACjCA,IAAIuM,KAAJ,CAAU,GAAV,EAAe,CAAf,EAAkBd,OAAlB,CAA0B,KAA1B,EAAiC,EAAjC,CAAP;;;ACOF,SAASe,aAAT,CAAuBC,OAAvB,EAAgC7K,KAAhC,EAAuC8K,sBAAvC,EAA+D;MACzDC,cAAc,IAAlB;;;;MAII/K,QAAQ,CAAR,IAAasK,YAAYnM,IAAZ,CAAiB0M,OAAjB,CAAb,IAA0CA,QAAQzJ,MAAR,GAAiB,CAA/D,EAAkE;kBAClD,IAAd;;;;;MAKEpB,UAAU,CAAV,IAAe6K,QAAQG,WAAR,OAA0B,OAA7C,EAAsD;kBACtC,KAAd;;;;;MAKEhL,QAAQ,CAAR,IAAa6K,QAAQzJ,MAAR,GAAiB,CAA9B,IAAmC,CAAC0J,sBAAxC,EAAgE;kBAChD,KAAd;;;SAGKC,WAAP;;;;;;AAMF,AAAe,SAASE,cAAT,CAAwB7M,GAAxB,EAA6B8M,MAA7B,EAAqC;MAC5C7M,YAAY6M,UAAU5M,IAAIC,KAAJ,CAAUH,GAAV,CAA5B;MACQ+M,QAF0C,GAEjB9M,SAFiB,CAE1C8M,QAF0C;MAEhCC,IAFgC,GAEjB/M,SAFiB,CAEhC+M,IAFgC;MAE1BC,IAF0B,GAEjBhN,SAFiB,CAE1BgN,IAF0B;;;MAI9CP,yBAAyB,KAA7B;MACMQ,kBAAkBD,KAAKV,KAAL,CAAW,GAAX,EACvBY,OADuB,GAEvBvE,MAFuB,CAEhB,UAACC,GAAD,EAAMuE,UAAN,EAAkBxL,KAAlB,EAA4B;QAC9B6K,UAAUW,UAAd;;;QAGIX,QAAQ9J,QAAR,CAAiB,GAAjB,CAAJ,EAA2B;2BACU8J,QAAQF,KAAR,CAAc,GAAd,CADV;;;;UAClBc,eADkB;UACDC,OADC;;UAErBrB,YAAYlM,IAAZ,CAAiBuN,OAAjB,CAAJ,EAA+B;kBACnBD,eAAV;;;;;;QAMAtB,gBAAgBhM,IAAhB,CAAqB0M,OAArB,KAAiC7K,QAAQ,CAA7C,EAAgD;gBACpC6K,QAAQhB,OAAR,CAAgBM,eAAhB,EAAiC,EAAjC,CAAV;;;;;;;QAOEnK,UAAU,CAAd,EAAiB;+BACUoK,aAAajM,IAAb,CAAkB0M,OAAlB,CAAzB;;;;QAIED,cAAcC,OAAd,EAAuB7K,KAAvB,EAA8B8K,sBAA9B,CAAJ,EAA2D;UACrDa,IAAJ,CAASd,OAAT;;;WAGK5D,GAAP;GAhCsB,EAiCrB,EAjCqB,CAAxB;;SAmCUkE,QAAV,UAAuBC,IAAvB,GAA8BE,gBAAgBC,OAAhB,GAA0BvO,IAA1B,CAA+B,GAA/B,CAA9B;;;AC3EF;;AAEA,IAAM4O,kBAAkB,IAAI7O,MAAJ,CAAW,QAAX,CAAxB;AACA,AAAe,SAAS8O,cAAT,CAAwBxE,IAAxB,EAA8B;SACpCuE,gBAAgBzN,IAAhB,CAAqBkJ,IAArB,CAAP;;;ACKF;;;;;AAKA,AAAe,SAASyE,aAAT,CAAuBC,UAAvB,EAAmCC,QAAnC,EAA6CpN,CAA7C,EAAgD;MACzD,CAACmN,WAAW7C,MAAX,GAAoB9H,MAAzB,EAAiC;WACxB2K,UAAP;;;MAGIE,wBAAwB1D,KAAKE,GAAL,CAAS,EAAT,EAAauD,WAAW,IAAxB,CAA9B;MACME,cAActN,EAAE,aAAF,CAApB;;aAEWsK,MAAX,GAAoB/H,QAApB,GAA+BpC,IAA/B,CAAoC,UAACiB,KAAD,EAAQmF,OAAR,EAAoB;QAChDgH,WAAWvN,EAAEuG,OAAF,CAAjB;;QAEI7B,4BAA0BnF,IAA1B,CAA+BgH,QAAQ5D,OAAvC,CAAJ,EAAqD;aAC5C,IAAP;;;QAGI6K,eAAetE,SAASqE,QAAT,CAArB;QACIC,YAAJ,EAAkB;UACZD,aAAaJ,UAAjB,EAA6B;oBACf/J,MAAZ,CAAmBmK,QAAnB;OADF,MAEO;YACDE,eAAe,CAAnB;YACMC,UAAUC,YAAYJ,QAAZ,CAAhB;;;;YAIIG,UAAU,IAAd,EAAoB;0BACF,EAAhB;;;;;YAKEA,WAAW,GAAf,EAAoB;0BACF,EAAhB;;;;;YAKEH,SAAS/M,IAAT,CAAc,OAAd,MAA2B2M,WAAW3M,IAAX,CAAgB,OAAhB,CAA/B,EAAyD;0BACvC4M,WAAW,GAA3B;;;YAGIQ,WAAWJ,eAAeC,YAAhC;;YAEIG,YAAYP,qBAAhB,EAAuC;iBAC9BC,YAAYlK,MAAZ,CAAmBmK,QAAnB,CAAP;SADF,MAEO,IAAIhH,QAAQ5D,OAAR,KAAoB,GAAxB,EAA6B;cAC5BkL,iBAAiBN,SAAS9E,IAAT,EAAvB;cACMqF,uBAAuBtE,WAAWqE,cAAX,CAA7B;;cAEIC,uBAAuB,EAAvB,IAA6BJ,UAAU,IAA3C,EAAiD;mBACxCJ,YAAYlK,MAAZ,CAAmBmK,QAAnB,CAAP;WADF,MAEO,IAAIO,wBAAwB,EAAxB,IAA8BJ,YAAY,CAA1C,IACDT,eAAeY,cAAf,CADH,EACmC;mBACjCP,YAAYlK,MAAZ,CAAmBmK,QAAnB,CAAP;;;;;;WAMD,IAAP;GAnDF;;SAsDOD,WAAP;;;ACxEF;;AAEA,AAAe,SAASS,gBAAT,CAA0B/N,CAA1B,EAA6B;MACtCmN,mBAAJ;MACIC,WAAW,CAAf;;IAEE,SAAF,EAAajN,IAAb,CAAkB,UAACiB,KAAD,EAAQf,IAAR,EAAiB;;QAE7BqE,4BAA0BnF,IAA1B,CAA+Bc,KAAKsC,OAApC,CAAJ,EAAkD;;;;QAI5CrC,QAAQN,EAAEK,IAAF,CAAd;QACM4I,QAAQC,SAAS5I,KAAT,CAAd;;QAEI2I,QAAQmE,QAAZ,EAAsB;iBACTnE,KAAX;mBACa3I,KAAb;;GAXJ;;;;MAiBI,CAAC6M,UAAL,EAAiB;WACRnN,EAAE,MAAF,KAAaA,EAAE,GAAF,EAAOgO,KAAP,EAApB;;;eAGWd,cAAcC,UAAd,EAA0BC,QAA1B,EAAoCpN,CAApC,CAAb;;SAEOmN,UAAP;;;ACtBF,SAASc,mBAAT,CAA6B3N,KAA7B,EAAoCN,CAApC,EAAuCkO,MAAvC,EAA+C;;;;;MAKzC5N,MAAM6N,QAAN,CAAe,qBAAf,CAAJ,EAA2C;;;;MAIrCjM,UAAU8I,gBAAgB1K,MAAMmI,IAAN,EAAhB,CAAhB;;MAEIW,YAAYlH,OAAZ,IAAuB,EAA3B,EAA+B;QACvBkM,SAASpO,EAAE,GAAF,EAAOM,KAAP,EAAckC,MAA7B;QACM6L,aAAarO,EAAE,OAAF,EAAWM,KAAX,EAAkBkC,MAArC;;;QAGI6L,aAAcD,SAAS,CAA3B,EAA+B;YACvBzM,MAAN;;;;QAIIrC,gBAAgB4C,QAAQM,MAA9B;QACM8L,WAAWtO,EAAE,KAAF,EAASM,KAAT,EAAgBkC,MAAjC;;;;QAIIlD,gBAAgB,EAAhB,IAAsBgP,aAAa,CAAvC,EAA0C;YAClC3M,MAAN;;;;QAII+L,UAAUC,YAAYrN,KAAZ,CAAhB;;;;;QAKI4N,SAAS,EAAT,IAAeR,UAAU,GAAzB,IAAgCpO,gBAAgB,EAApD,EAAwD;YAChDqC,MAAN;;;;;;QAMEuM,UAAU,EAAV,IAAgBR,UAAU,GAA9B,EAAmC;;;;UAI3B/K,UAAUrC,MAAMhC,GAAN,CAAU,CAAV,EAAaqE,OAA7B;UACM4L,aAAa5L,YAAY,IAAZ,IAAoBA,YAAY,IAAnD;UACI4L,UAAJ,EAAgB;YACRC,eAAelO,MAAMmO,IAAN,EAArB;YACID,gBAAgBxD,gBAAgBwD,aAAa/F,IAAb,EAAhB,EAAqCsB,KAArC,CAA2C,CAAC,CAA5C,MAAmD,GAAvE,EAA4E;;;;;YAKxEpI,MAAN;;;;QAII+M,cAAc1O,EAAE,QAAF,EAAYM,KAAZ,EAAmBkC,MAAvC;;;QAGIkM,cAAc,CAAd,IAAmBpP,gBAAgB,GAAvC,EAA4C;YACpCqC,MAAN;;;;;;;;;;;;;AAaN,AAAe,SAASgN,SAAT,CAAmB7G,QAAnB,EAA6B9H,CAA7B,EAAgC;IAC3CmE,wBAAF,EAA4B2D,QAA5B,EAAsC3H,IAAtC,CAA2C,UAACiB,KAAD,EAAQf,IAAR,EAAiB;QACpDC,QAAQN,EAAEK,IAAF,CAAd;QACI6N,SAAShF,SAAS5I,KAAT,CAAb;QACI,CAAC4N,MAAL,EAAa;eACF/D,eAAe7J,KAAf,EAAsBN,CAAtB,CAAT;eACSM,KAAT,EAAgBN,CAAhB,EAAmBkO,MAAnB;;;;QAIEA,SAAS,CAAb,EAAgB;YACRvM,MAAN;KADF,MAEO;;0BAEerB,KAApB,EAA2BN,CAA3B,EAA8BkO,MAA9B;;GAbJ;;SAiBOlO,CAAP;;;ACrGa,SAAS4O,YAAT,CAAsB9G,QAAtB,EAAgC9H,CAAhC,EAA+C;MAAZ6O,KAAY,yDAAJ,EAAI;;IAC1DxK,eAAF,EAAmByD,QAAnB,EAA6B3H,IAA7B,CAAkC,UAACiB,KAAD,EAAQ0N,MAAR,EAAmB;QAC7CC,UAAU/O,EAAE8O,MAAF,CAAhB;;;;;QAKI9O,EAAE+O,OAAF,EAAWjH,QAAX,EAAqBkH,OAArB,CAA6B,GAA7B,EAAkCxM,MAAlC,KAA6C,CAAjD,EAAoD;aAC3CuM,QAAQpN,MAAR,EAAP;;;;QAIEqJ,gBAAgBhL,EAAE8O,MAAF,EAAUrG,IAAV,EAAhB,MAAsCoG,KAA1C,EAAiD;aACxCE,QAAQpN,MAAR,EAAP;;;;;QAKEqH,UAAUhJ,EAAE8O,MAAF,CAAV,IAAuB,CAA3B,EAA8B;aACrBC,QAAQpN,MAAR,EAAP;;;WAGKoN,OAAP;GArBF;;SAwBO/O,CAAP;;;AC5BF;;;AAEA,AAAe,SAASiP,eAAT,CAAyBjH,OAAzB,EAAkChI,CAAlC,EAAqC;;;;MAI9CmH,cAAcnH,EAAE,MAAF,CAAd,EAAyBA,CAAzB,EAA4B,KAA5B,CAAJ;MACImH,cAAcnH,EAAE,MAAF,CAAd,EAAyBA,CAAzB,EAA4B,KAA5B,CAAJ;;SAEOA,CAAP;;;ACTF,SAASkP,UAAT,CAAoBlP,CAApB,EAAuBmP,OAAvB,EAAgC3O,IAAhC,EAAsC4O,QAAtC,EAAgD;UACxC5O,IAAN,QAAe4O,QAAf,EAAyBjP,IAAzB,CAA8B,UAACC,CAAD,EAAIC,IAAJ,EAAa;QACnCb,MAAMa,KAAKY,OAAL,CAAaT,IAAb,CAAZ;QACM6O,cAAc3P,IAAIjB,OAAJ,CAAY0Q,OAAZ,EAAqB3P,GAArB,CAApB;;SAEKyB,OAAL,CAAaT,IAAb,IAAqB6O,WAArB;GAJF;;;AAQF,AAAe,SAASC,iBAAT,CAA2BF,QAA3B,EAAqCpP,CAArC,EAAwCR,GAAxC,EAA6C;GACzD,MAAD,EAAS,KAAT,EAAgB0B,OAAhB,CAAwB;WAAQgO,WAAWlP,CAAX,EAAcR,GAAd,EAAmBgB,IAAnB,EAAyB4O,QAAzB,CAAR;GAAxB;;SAEOA,QAAP;;;ACbK,SAAS5F,UAAT,CAAoBf,IAApB,EAA0B;SACxBA,KAAKC,IAAL,GACKuC,OADL,CACa,MADb,EACqB,GADrB,EAEKzI,MAFZ;;;;;;AAQF,AAAO,SAASmL,WAAT,CAAqBrN,KAArB,EAA4B;MAC3BiP,kBAAkB/F,WAAWlJ,MAAMmI,IAAN,EAAX,CAAxB;;MAEM+G,WAAWlP,MAAMkB,IAAN,CAAW,GAAX,EAAgBiH,IAAhB,EAAjB;MACMgH,aAAajG,WAAWgG,QAAX,CAAnB;;MAEID,kBAAkB,CAAtB,EAAyB;WAChBE,aAAaF,eAApB;GADF,MAEO,IAAIA,oBAAoB,CAApB,IAAyBE,aAAa,CAA1C,EAA6C;WAC3C,CAAP;;;SAGK,CAAP;;;ACpBF;;;AAEA,AAAe,SAASC,eAAT,CACb1P,CADa,EAEb2P,SAFa,EAGbC,WAHa,EAKb;MADAjB,SACA,yDADY,IACZ;;MACMkB,aAAaF,UAAUjO,MAAV,CAAiB;WAAQkO,YAAYE,OAAZ,CAAoBC,IAApB,MAA8B,CAAC,CAAvC;GAAjB,CAAnB;;;;;;;;UAEWA,IAHX;;UAIQ1O,OAAO,MAAb;UACMd,QAAQ,OAAd;;UAEMyP,QAAQhQ,YAAUqB,IAAV,UAAmB0O,IAAnB,QAAd;;;;;UAKME,SACJD,MAAMrM,GAAN,CAAU,UAACvC,KAAD,EAAQf,IAAR;eAAiBL,EAAEK,IAAF,EAAQG,IAAR,CAAaD,KAAb,CAAjB;OAAV,EACM2P,OADN,GAEMxO,MAFN,CAEa;eAAQ+G,SAAS,EAAjB;OAFb,CADF;;;;;;UASIwH,OAAOzN,MAAP,KAAkB,CAAtB,EAAyB;YACnB2N,kBAAJ;;;YAGIxB,SAAJ,EAAe;sBACDyB,UAAUH,OAAO,CAAP,CAAV,EAAqBjQ,CAArB,CAAZ;SADF,MAEO;sBACOiQ,OAAO,CAAP,CAAZ;;;;aAGKE;;;;;yBA5BQN,UAAnB,8HAA+B;;;;;;;;;;;;;;;;;;;;;;SAiCxB,IAAP;;;AC3CF,SAASQ,UAAT,CAAoB/P,KAApB,EAA2BgQ,WAA3B,EAAwC;;;MAGlChQ,MAAMiC,QAAN,GAAiBC,MAAjB,GAA0B8N,WAA9B,EAA2C;WAClC,KAAP;;;MAGEC,cAAcjQ,KAAd,CAAJ,EAA0B;WACjB,KAAP;;;SAGK,IAAP;;;;;;AAMF,AAAe,SAASkQ,oBAAT,CACbxQ,CADa,EAEbyQ,SAFa,EAKb;MAFAH,WAEA,yDAFc,CAEd;MADAI,QACA,yDADW,IACX;;;;;;yBACuBD,SAAvB,8HAAkC;UAAvB7M,QAAuB;;UAC1BoM,QAAQhQ,EAAE4D,QAAF,CAAd;;;;UAIIoM,MAAMxN,MAAN,KAAiB,CAArB,EAAwB;YAChBlC,QAAQN,EAAEgQ,MAAM,CAAN,CAAF,CAAd;;YAEIK,WAAW/P,KAAX,EAAkBgQ,WAAlB,CAAJ,EAAoC;cAC9BpO,gBAAJ;cACIwO,QAAJ,EAAc;sBACFpQ,MAAMmI,IAAN,EAAV;WADF,MAEO;sBACKnI,MAAMqQ,IAAN,EAAV;;;cAGEzO,OAAJ,EAAa;mBACJA,OAAP;;;;;;;;;;;;;;;;;;;;SAMD,IAAP;;;AChDF;AACA,AAAe,SAASkO,SAAT,CAAmB3H,IAAnB,EAAyBzI,CAAzB,EAA4B;;;MAGnC4Q,YAAY5Q,aAAWyI,IAAX,cAA0BA,IAA1B,EAAlB;SACOmI,cAAc,EAAd,GAAmBnI,IAAnB,GAA0BmI,SAAjC;;;ACLa,SAASL,aAAT,CAAuBjQ,KAAvB,EAA8B;MACrCyC,UAAUzC,MAAMyC,OAAN,GAAgBmN,OAAhB,EAAhB;MACMW,gBAAgB9N,QAAQvB,IAAR,CAAa,UAAC8I,MAAD,EAAY;QACvCvE,aAAgBuE,OAAOrJ,OAAP,CAAe6P,KAA/B,SAAwCxG,OAAOrJ,OAAP,CAAe6E,EAA7D;WACOC,WAAW5D,QAAX,CAAoB,SAApB,CAAP;GAFoB,CAAtB;;SAKO0O,kBAAkBE,SAAzB;;;ACPF;;;;AAIA,AAAe,SAASC,gBAAT,CAA0B1Q,KAA1B,EAAiC;SACvCA,MAAMmI,IAAN,GAAaC,IAAb,GAAoBlG,MAApB,IAA8B,GAArC;;;ACHa,SAASyO,WAAT,CAAqBjR,CAArB,EAAwB;SAC9BA,EAAEiF,cAAF,EAAkBzC,MAAlB,GAA2B,CAAlC;;;ACHF;AACA,AAAO,IAAM0O,kBAAkB,wCAAxB;;;;AAIP,AAAO,IAAMC,eAAe,IAAIhT,MAAJ,CAAW,aAAX,EAA0B,GAA1B,CAArB;AACP,AAYA,AASA;AACA,AAAO,IAAMiT,iBAAiB,WAAvB;AACP,AAAO,IAAMC,kBAAkB,WAAxB;AACP,AAAO,IAAMC,uBAAuB,4BAA7B;AACP,AAAO,IAAMC,yBAAyB,oBAA/B;AACP,AAAO,IAAMC,wBAAwB,QAA9B;AACP,IAAMC,SAAS,CACb,KADa,EAEb,KAFa,EAGb,KAHa,EAIb,KAJa,EAKb,KALa,EAMb,KANa,EAOb,KAPa,EAQb,KARa,EASb,KATa,EAUb,KAVa,EAWb,KAXa,EAYb,KAZa,CAAf;AAcA,IAAMC,YAAYD,OAAOrT,IAAP,CAAY,GAAZ,CAAlB;AACA,IAAMuT,aAAa,qCAAnB;AACA,IAAMC,aAAa,wCAAnB;AACA,AAAO,IAAMC,oBACX,IAAI1T,MAAJ,OAAewT,UAAf,WAA+BC,UAA/B,wBAA4DF,SAA5D,QAA0E,IAA1E,CADK;;;;;AAMP,AAAO,IAAMI,qBAAqB,gBAA3B;;AAEP,AAAO,IAAMC,oBACX,IAAI5T,MAAJ,CAAW,2BAAX,EAAwC,GAAxC,CADK;;ACxDP;;AAEA,AAAe,SAAS6T,WAAT,CAAqBC,MAArB,EAA6B;SACnCA,OAAOhH,OAAP,CAAeiG,eAAf,EAAgC,IAAhC,EAAsCxI,IAAtC,EAAP;;;ACHa,SAAS9G,OAAT,CAAesQ,YAAf,EAA6B;iBAC3BA,aAAaxJ,IAAb,EAAf;MACIyJ,SAASC,QAAT,CAAkBF,YAAlB,CAAJ,EAAqC;WAC5BA,YAAP;;;SAGK,IAAP;;;ACJF;;AAEA,AAAe,SAASG,QAAT,CAAkBC,GAAlB,QAA8B;MAALtS,CAAK,QAALA,CAAK;;;MAEvCsS,IAAI9P,MAAJ,GAAa,IAAb,IAAqB8P,IAAI9P,MAAJ,GAAa,CAAtC,EAAyC,OAAO,IAAP;;MAEnC+P,UAAUnC,UAAUkC,GAAV,EAAetS,CAAf,CAAhB;;;;MAIImR,aAAa5R,IAAb,CAAkBgT,OAAlB,CAAJ,EAAgC,OAAO,IAAP;;SAEzBA,QAAQ7J,IAAR,EAAP;;;ACfF;;;;AAIA,AASA,AAAO,SAAS8J,eAAT,CAAyBC,UAAzB,EAAqC;SACnC,CAACA,WAAWpJ,KAAX,CAAiBwI,iBAAjB,KAAuC,EAAxC,EACWzT,IADX,CACgB,GADhB,EAEW6M,OAFX,CAEmBuG,qBAFnB,EAE0C,GAF1C,EAGWvG,OAHX,CAGmBsG,sBAHnB,EAG2C,UAH3C,EAIWtG,OAJX,CAImBqG,oBAJnB,EAIyC,IAJzC,EAKW5I,IALX,EAAP;;;;;AAUF,AAAe,SAASgK,kBAAT,CAA4BD,UAA5B,EAAwC;;MAEjDrB,eAAe7R,IAAf,CAAoBkT,UAApB,KAAmCpB,gBAAgB9R,IAAhB,CAAqBkT,UAArB,CAAvC,EAAyE;iBAC1D/K,SAAS+K,UAAT,EAAqB,EAArB,CAAb;;;MAGEE,OAAOC,OAAO,IAAIC,IAAJ,CAASJ,UAAT,CAAP,CAAX;;MAEI,CAACE,KAAKG,OAAL,EAAL,EAAqB;iBACNN,gBAAgBC,UAAhB,CAAb;WACOG,OAAO,IAAIC,IAAJ,CAASJ,UAAT,CAAP,CAAP;;;SAGKE,KAAKG,OAAL,KAAiBH,KAAKI,WAAL,EAAjB,GAAsC,IAA7C;;;AC1BF;;AACA,AAAe,SAASC,gBAAT,CACbhL,OADa,QAQb;MALEhI,CAKF,QALEA,CAKF;mCAJEiT,kBAIF;MAJEA,kBAIF,yCAJuB,IAIvB;wBAHEpE,KAGF;MAHEA,KAGF,8BAHU,EAGV;sBAFErP,GAEF;MAFEA,GAEF,4BAFQ,EAER;;;;kBAGgBwI,OAAhB,EAAyBhI,CAAzB;;;cAGYgI,OAAZ,EAAqBhI,CAArB;;;;gBAIcgI,OAAd,EAAuBhI,CAAvB;;;;;aAKWgI,OAAX,EAAoBhI,CAApB;;;eAGagI,OAAb,EAAsBhI,CAAtB,EAAyB6O,KAAzB;;;oBAGkB7G,OAAlB,EAA2BhI,CAA3B,EAA8BR,GAA9B;;;kBAGgBwI,OAAhB;;;;;YAKUA,OAAV,EAAmBhI,CAAnB,EAAsBiT,kBAAtB;;;cAGYjL,OAAZ,EAAqBhI,CAArB;;SAEOgI,OAAP;;;AClDa,SAASkL,UAAT,CAAoBrE,KAApB,QAAuC;MAAVrP,GAAU,QAAVA,GAAU;MAALQ,CAAK,QAALA,CAAK;;;;MAGhD8R,mBAAmBvS,IAAnB,CAAwBsP,KAAxB,CAAJ,EAAoC;YAC1BsE,kBAAkBtE,KAAlB,EAAyBrP,GAAzB,CAAR;;;;;MAKEqP,MAAMrM,MAAN,GAAe,GAAnB,EAAwB;;QAEhB4Q,KAAKpT,EAAE,IAAF,CAAX;QACIoT,GAAG5Q,MAAH,KAAc,CAAlB,EAAqB;cACX4Q,GAAG3K,IAAH,EAAR;;;;;SAKG2H,UAAUvB,KAAV,EAAiB7O,CAAjB,EAAoB0I,IAApB,EAAP;;;ACdF,SAAS2K,sBAAT,CAAgCC,UAAhC,EAA4C7K,IAA5C,EAAkD;;;;MAI5C6K,WAAW9Q,MAAX,IAAqB,CAAzB,EAA4B;;;;;UAIpB+Q,aAAaD,WAAWlL,MAAX,CAAkB,UAACC,GAAD,EAAMmL,SAAN,EAAoB;YACnDA,SAAJ,IAAiBnL,IAAImL,SAAJ,IAAiBnL,IAAImL,SAAJ,IAAiB,CAAlC,GAAsC,CAAvD;eACOnL,GAAP;OAFiB,EAGhB,EAHgB,CAAnB;;kCAMEhB,QAAQrG,OAAR,CAAgBuS,UAAhB,EACQnL,MADR,CACe,UAACC,GAAD,EAAMf,GAAN,EAAc;YAChBe,IAAI,CAAJ,IAASkL,WAAWjM,GAAX,CAAb,EAA8B;iBACrB,CAACA,GAAD,EAAMiM,WAAWjM,GAAX,CAAN,CAAP;;;eAGKe,GAAP;OANT,EAOU,CAAC,CAAD,EAAI,CAAJ,CAPV,CAVwB;;;;UASnBoL,OATmB;UASVC,SATU;;;;;;;UAuBtBA,aAAa,CAAb,IAAkBD,QAAQjR,MAAR,IAAkB,CAAxC,EAA2C;qBAC5BiG,KAAKsD,KAAL,CAAW0H,OAAX,CAAb;;;UAGIE,YAAY,CAACL,WAAW,CAAX,CAAD,EAAgBA,WAAWvJ,KAAX,CAAiB,CAAC,CAAlB,CAAhB,CAAlB;UACM6J,aAAaD,UAAUvL,MAAV,CAAiB,UAACC,GAAD,EAAM1K,GAAN;eAAc0K,IAAI7F,MAAJ,GAAa7E,IAAI6E,MAAjB,GAA0B6F,GAA1B,GAAgC1K,GAA9C;OAAjB,EAAoE,EAApE,CAAnB;;UAEIiW,WAAWpR,MAAX,GAAoB,EAAxB,EAA4B;;aACnBoR;;;;;WAGFnL;;;;;;;SAGF,IAAP;;;AAGF,SAASoL,oBAAT,CAA8BP,UAA9B,EAA0C9T,GAA1C,EAA+C;;;;;;;mBAO5BE,IAAIC,KAAJ,CAAUH,GAAV,CAP4B;;MAOrCgN,IAPqC,cAOrCA,IAPqC;;MAQvCsH,cAActH,KAAKvB,OAAL,CAAa8G,iBAAb,EAAgC,EAAhC,CAApB;;MAEMgC,YAAYT,WAAW,CAAX,EAAclH,WAAd,GAA4BnB,OAA5B,CAAoC,GAApC,EAAyC,EAAzC,CAAlB;MACM+I,iBAAiBC,MAAMC,WAAN,CAAkBH,SAAlB,EAA6BD,WAA7B,CAAvB;;MAEIE,iBAAiB,GAAjB,IAAwBD,UAAUvR,MAAV,GAAmB,CAA/C,EAAkD;WACzC8Q,WAAWvJ,KAAX,CAAiB,CAAjB,EAAoB3L,IAApB,CAAyB,EAAzB,CAAP;;;MAGI+V,UAAUb,WAAWvJ,KAAX,CAAiB,CAAC,CAAlB,EAAqB,CAArB,EAAwBqC,WAAxB,GAAsCnB,OAAtC,CAA8C,GAA9C,EAAmD,EAAnD,CAAhB;MACMmJ,eAAeH,MAAMC,WAAN,CAAkBC,OAAlB,EAA2BL,WAA3B,CAArB;;MAEIM,eAAe,GAAf,IAAsBD,QAAQ3R,MAAR,IAAkB,CAA5C,EAA+C;WACtC8Q,WAAWvJ,KAAX,CAAiB,CAAjB,EAAoB,CAAC,CAArB,EAAwB3L,IAAxB,CAA6B,EAA7B,CAAP;;;SAGK,IAAP;;;;;AAKF,AAAe,SAAS+U,iBAAT,CAA2BtE,KAA3B,EAA4C;MAAVrP,GAAU,yDAAJ,EAAI;;;;MAGnD8T,aAAazE,MAAM9C,KAAN,CAAY+F,kBAAZ,CAAnB;MACIwB,WAAW9Q,MAAX,KAAsB,CAA1B,EAA6B;WACpBqM,KAAP;;;MAGEwF,WAAWhB,uBAAuBC,UAAvB,EAAmCzE,KAAnC,CAAf;MACIwF,QAAJ,EAAc,OAAOA,QAAP;;aAEHR,qBAAqBP,UAArB,EAAiC9T,GAAjC,CAAX;MACI6U,QAAJ,EAAc,OAAOA,QAAP;;;;SAIPxF,KAAP;;;AC3FF,IAAMyF,WAAW;UACPtC,WADO;kBAECuC,OAFD;OAGVlC,QAHU;kBAICK,kBAJD;WAKN8B,gBALM;SAMRtB;CANT,CAUA,AAEA,AACA,AACA,AACA,AACA,AACA,AACA;;ACfA;;;;;;;;;;;AAWA,AAAe,SAASuB,eAAT,CAAyBzU,CAAzB,EAA4B0U,IAA5B,EAAkC;;;;;;;MAO3CA,KAAK/O,uBAAT,EAAkC;QAC5BA,wBAAwB3F,CAAxB,CAAJ;;;MAGEkH,oBAAoBlH,CAApB,CAAJ;MACI4K,aAAa5K,CAAb,EAAgB0U,KAAKnK,WAArB,CAAJ;MACMoK,gBAAgB5G,iBAAiB/N,CAAjB,CAAtB;;SAEO2U,aAAP;;;AC3BF,IAAMC,0BAA0B;eACjB;6BACc,IADd;iBAEE,IAFF;wBAGS;GAJQ;;;;;;;;;;;;;;;;;;;;;SAAA,yBA0BGF,IA1BH,EA0BS;QAA7B1U,CAA6B,QAA7BA,CAA6B;QAA1B2Q,IAA0B,QAA1BA,IAA0B;QAApB9B,KAAoB,QAApBA,KAAoB;QAAbrP,GAAa,QAAbA,GAAa;;wBACzB,KAAKqV,WAAjB,EAAiCH,IAAjC;;QAEI1U,KAAKoC,QAAQC,IAAR,CAAasO,IAAb,CAAT;;;;QAIItQ,OAAO,KAAKyU,cAAL,CAAoB9U,CAApB,EAAuB6O,KAAvB,EAA8BrP,GAA9B,EAAmCkV,IAAnC,CAAX;;QAEI1D,iBAAiB3Q,IAAjB,CAAJ,EAA4B;aACnB,KAAK0U,kBAAL,CAAwB1U,IAAxB,EAA8BL,CAA9B,CAAP;;;;;;;;;;2BAKgBqH,QAAQrG,OAAR,CAAgB0T,IAAhB,EAAsBhT,MAAtB,CAA6B;eAAKgT,KAAKM,CAAL,MAAY,IAAjB;OAA7B,CAAlB,8HAAuE;YAA5D1N,GAA4D;;aAChEA,GAAL,IAAY,KAAZ;YACIlF,QAAQC,IAAR,CAAasO,IAAb,CAAJ;;eAEO,KAAKmE,cAAL,CAAoB9U,CAApB,EAAuB6O,KAAvB,EAA8BrP,GAA9B,EAAmCkV,IAAnC,CAAP;;YAEI1D,iBAAiB3Q,IAAjB,CAAJ,EAA4B;;;;;;;;;;;;;;;;;;;WAKvB,KAAK0U,kBAAL,CAAwB1U,IAAxB,EAA8BL,CAA9B,CAAP;GApD4B;;;;gBAAA,0BAwDfA,CAxDe,EAwDZ6O,KAxDY,EAwDLrP,GAxDK,EAwDAkV,IAxDA,EAwDM;WAC3BF,iBACGC,gBAAgBzU,CAAhB,EAAmB0U,IAAnB,CADH,EAEL;UAAA;0BAEsBA,KAAKzB,kBAF3B;kBAAA;;KAFK,CAAP;GAzD4B;;;;;;oBAAA,8BAsEX5S,IAtEW,EAsELL,CAtEK,EAsEF;QACtB,CAACK,IAAL,EAAW;aACF,IAAP;;;WAGK2K,gBAAgBhL,EAAE2Q,IAAF,CAAOtQ,IAAP,CAAhB,CAAP;;;;;;;CA3EJ,CAqFA;;AC9FA;;;;;;;AAOA,AAAO,IAAM4U,yBAAyB,CACpC,iBADoC,EAEpC,UAFoC,EAGpC,SAHoC,EAIpC,UAJoC,EAKpC,OALoC,CAA/B;;;;AAUP,AAAO,IAAMC,uBAAuB,CAClC,UADkC,CAA7B;;;;;;;;;AAWP,AAAO,IAAMC,yBAAyB,CACpC,sBADoC,EAEpC,kBAFoC,EAGpC,kBAHoC,EAIpC,YAJoC,EAKpC,mBALoC,EAMpC,cANoC,CAA/B;;AASP,AAAO,IAAMC,uBAAuB,CAClC,YADkC,EAElC,cAFkC,EAGlC,cAHkC,EAIlC,aAJkC,EAKlC,aALkC,EAMlC,aANkC,EAOlC,aAPkC,EAQlC,eARkC,EASlC,eATkC,EAUlC,iBAVkC,EAWlC,UAXkC,EAYlC,YAZkC,EAalC,IAbkC,EAclC,iBAdkC,EAelC,OAfkC,CAA7B;;ACxBP,IAAMC,wBAAwB;SAAA,yBACG;QAArBrV,CAAqB,QAArBA,CAAqB;QAAlBR,GAAkB,QAAlBA,GAAkB;QAAb8V,SAAa,QAAbA,SAAa;;;;QAGzBzG,cAAJ;;YAEQa,gBAAgB1P,CAAhB,EAAmBiV,sBAAnB,EAA2CK,SAA3C,CAAR;QACIzG,KAAJ,EAAW,OAAOqE,WAAWrE,KAAX,EAAkB,EAAErP,QAAF,EAAOQ,IAAP,EAAlB,CAAP;;;;YAIHwQ,qBAAqBxQ,CAArB,EAAwBmV,sBAAxB,CAAR;QACItG,KAAJ,EAAW,OAAOqE,WAAWrE,KAAX,EAAkB,EAAErP,QAAF,EAAOQ,IAAP,EAAlB,CAAP;;;YAGH0P,gBAAgB1P,CAAhB,EAAmBkV,oBAAnB,EAAyCI,SAAzC,CAAR;QACIzG,KAAJ,EAAW,OAAOqE,WAAWrE,KAAX,EAAkB,EAAErP,QAAF,EAAOQ,IAAP,EAAlB,CAAP;;;YAGHwQ,qBAAqBxQ,CAArB,EAAwBoV,oBAAxB,CAAR;QACIvG,KAAJ,EAAW,OAAOqE,WAAWrE,KAAX,EAAkB,EAAErP,QAAF,EAAOQ,IAAP,EAAlB,CAAP;;;WAGJ,EAAP;;CAvBJ,CA2BA;;ACxCA;;;;;;AAMA,AAAO,IAAMuV,mBAAmB,CAC9B,KAD8B,EAE9B,OAF8B,EAG9B,WAH8B,EAI9B,eAJ8B,EAK9B,YAL8B,EAM9B,WAN8B,EAO9B,SAP8B,CAAzB;;AAUP,AAAO,IAAMC,oBAAoB,GAA1B;;;;;;;;;AASP,AAAO,IAAMC,mBAAmB,CAC9B,sBAD8B,EAE9B,mBAF8B,EAG9B,oBAH8B,EAI9B,mBAJ8B,EAK9B,oBAL8B,EAM9B,qBAN8B,EAO9B,aAP8B,EAQ9B,iBAR8B,EAS9B,oBAT8B,EAU9B,qBAV8B,EAW9B,eAX8B,EAY9B,YAZ8B,EAa9B,YAb8B,EAc9B,cAd8B,EAe9B,cAf8B,EAgB9B,yBAhB8B,EAiB9B,qBAjB8B,EAkB9B,qBAlB8B,EAmB9B,SAnB8B,EAoB9B,SApB8B,EAqB9B,gBArB8B,EAsB9B,gBAtB8B,EAuB9B,SAvB8B,CAAzB;;;;AA4BP,IAAMC,WAAW,aAAjB;AACA,AAAO,IAAMC,sBAAsB,CACjC,CAAC,SAAD,EAAYD,QAAZ,CADiC,EAEjC,CAAC,SAAD,EAAYA,QAAZ,CAFiC,CAA5B;;ACzCP,IAAME,yBAAyB;SAAA,yBACH;QAAhB5V,CAAgB,QAAhBA,CAAgB;QAAbsV,SAAa,QAAbA,SAAa;;QACpBrD,eAAJ;;;;aAISvC,gBAAgB1P,CAAhB,EAAmBuV,gBAAnB,EAAqCD,SAArC,CAAT;QACIrD,UAAUA,OAAOzP,MAAP,GAAgBgT,iBAA9B,EAAiD;aACxCxD,YAAYC,MAAZ,CAAP;;;;aAIOzB,qBAAqBxQ,CAArB,EAAwByV,gBAAxB,EAA0C,CAA1C,CAAT;QACIxD,UAAUA,OAAOzP,MAAP,GAAgBgT,iBAA9B,EAAiD;aACxCxD,YAAYC,MAAZ,CAAP;;;;;;;;;;2BAK8B0D,mBAAhC,8HAAqD;;;;;YAAzC/R,QAAyC;YAA/BiS,KAA+B;;YAC7CxV,OAAOL,EAAE4D,QAAF,CAAb;YACIvD,KAAKmC,MAAL,KAAgB,CAApB,EAAuB;cACfiG,OAAOpI,KAAKoI,IAAL,EAAb;cACIoN,MAAMtW,IAAN,CAAWkJ,IAAX,CAAJ,EAAsB;mBACbuJ,YAAYvJ,IAAZ,CAAP;;;;;;;;;;;;;;;;;;;WAKC,IAAP;;CA7BJ,CAiCA;;AC9CA;;;;AAIA,AAAO,IAAMqN,2BAA2B,CACtC,wBADsC,EAEtC,aAFsC,EAGtC,SAHsC,EAItC,gBAJsC,EAKtC,WALsC,EAMtC,cANsC,EAOtC,UAPsC,EAQtC,UARsC,EAStC,SATsC,EAUtC,eAVsC,EAWtC,UAXsC,EAYtC,cAZsC,EAatC,qBAbsC,EActC,cAdsC,EAetC,SAfsC,EAgBtC,MAhBsC,CAAjC;;;;;AAsBP,AAAO,IAAMC,2BAA2B,CACtC,4BADsC,EAEtC,oBAFsC,EAGtC,0BAHsC,EAItC,kBAJsC,EAKtC,oBALsC,EAMtC,kBANsC,EAOtC,iBAPsC,EAQtC,aARsC,EAStC,eATsC,EAUtC,qBAVsC,EAWtC,mBAXsC,EAYtC,cAZsC,EAatC,aAbsC,EActC,YAdsC,EAetC,kBAfsC,EAgBtC,WAhBsC,EAiBtC,UAjBsC,CAAjC;;;;;AAuBP,IAAMC,kBAAkB,mDAAxB;AACA,AAAO,IAAMC,yBAAyB;;AAEpC,IAAI9X,MAAJ,CAAW,4BAAX,EAAyC,GAAzC,CAFoC;;;;AAMpC,IAAIA,MAAJ,CAAW,6BAAX,EAA0C,GAA1C,CANoC;;AAQpC,IAAIA,MAAJ,iBAAyB6X,eAAzB,kBAAuD,GAAvD,CARoC,CAA/B;;ACrCP,IAAME,gCAAgC;SAAA,yBACL;QAArBlW,CAAqB,QAArBA,CAAqB;QAAlBR,GAAkB,QAAlBA,GAAkB;QAAb8V,SAAa,QAAbA,SAAa;;QACzBa,sBAAJ;;;;oBAIgBzG,gBAAgB1P,CAAhB,EAAmB8V,wBAAnB,EAA6CR,SAA7C,EAAwD,KAAxD,CAAhB;QACIa,aAAJ,EAAmB,OAAOzD,mBAAmByD,aAAnB,CAAP;;;;oBAIH3F,qBAAqBxQ,CAArB,EAAwB+V,wBAAxB,CAAhB;QACII,aAAJ,EAAmB,OAAOzD,mBAAmByD,aAAnB,CAAP;;;oBAGHjL,eAAe1L,GAAf,EAAoByW,sBAApB,CAAhB;QACIE,aAAJ,EAAmB,OAAOzD,mBAAmByD,aAAnB,CAAP;;WAEZ,IAAP;;CAlBJ,CAsBA;;ACnCA;;;;;;;;;;;;;;;;;AAiBA,IAAMC,sBAAsB;;SAAA,qBAEhB;WACD,IAAP;;CAHJ,CAOA;;ACxBA;;;AAGA,AAAO,IAAMC,2BAA2B,CACtC,UADsC,EAEtC,eAFsC,EAGtC,WAHsC,CAAjC;;AAMP,AAAO,IAAMC,2BAA2B,CACtC,qBADsC,CAAjC;;AAIP,AAAO,IAAMC,gCAAgC,CAC3C,QAD2C,EAE3C,YAF2C,EAG3C,OAH2C,EAI3C,OAJ2C,EAK3C,UAL2C,CAAtC;AAOP,AAAO,IAAMC,mCAAmC,IAAIrY,MAAJ,CAAWoY,8BAA8BnY,IAA9B,CAAmC,GAAnC,CAAX,EAAoD,GAApD,CAAzC;;AAEP,AAAO,IAAMqY,gCAAgC,CAC3C,QAD2C,EAE3C,QAF2C,EAG3C,OAH2C,EAI3C,UAJ2C,EAK3C,UAL2C,EAM3C,MAN2C,EAO3C,IAP2C,EAQ3C,YAR2C,EAS3C,MAT2C,EAU3C,QAV2C,EAW3C,QAX2C,EAY3C,KAZ2C,EAa3C,QAb2C,EAc3C,SAd2C,EAe3C,QAf2C,EAgB3C,SAhB2C,EAiB3C,SAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C,EAoB3C,UApB2C,EAqB3C,SArB2C,EAsB3C,OAtB2C,EAuB3C,OAvB2C,EAwB3C,KAxB2C,EAyB3C,aAzB2C,CAAtC;AA2BP,AAAO,IAAMC,mCAAmC,IAAIvY,MAAJ,CAAWsY,8BAA8BrY,IAA9B,CAAmC,GAAnC,CAAX,EAAoD,GAApD,CAAzC;;AAEP,AAAO,IAAMuY,SAAS,gBAAf;AACP,AAAO,IAAMC,SAAS,kBAAf;;AC3CP,SAASC,MAAT,CAAgBvW,KAAhB,EAAuB;UACXA,MAAME,IAAN,CAAW,OAAX,KAAuB,EAAjC,WAAuCF,MAAME,IAAN,CAAW,IAAX,KAAoB,EAA3D;;;;AAIF,AAAO,SAASsW,aAAT,CAAuBtX,GAAvB,EAA4B;QAC3BA,IAAIkJ,IAAJ,EAAN;MACIO,QAAQ,CAAZ;;MAEIuN,iCAAiCjX,IAAjC,CAAsCC,GAAtC,CAAJ,EAAgD;aACrC,EAAT;;;MAGEkX,iCAAiCnX,IAAjC,CAAsCC,GAAtC,CAAJ,EAAgD;aACrC,EAAT;;;;;MAKEmX,OAAOpX,IAAP,CAAYC,GAAZ,CAAJ,EAAsB;aACX,EAAT;;;MAGEoX,OAAOrX,IAAP,CAAYC,GAAZ,CAAJ,EAAsB;aACX,EAAT;;;;;SAKKyJ,KAAP;;;;AAIF,AAAO,SAAS8N,SAAT,CAAmBvP,IAAnB,EAAyB;MAC1BA,KAAKhH,IAAL,CAAU,KAAV,CAAJ,EAAsB;WACb,CAAP;;;SAGK,CAAP;;;;;AAKF,AAAO,SAASwW,cAAT,CAAwBxP,IAAxB,EAA8B;MAC/ByB,QAAQ,CAAZ;MACMgO,aAAazP,KAAKzE,OAAL,CAAa,QAAb,EAAuBiL,KAAvB,EAAnB;;MAEIiJ,WAAWzU,MAAX,KAAsB,CAA1B,EAA6B;aAClB,EAAT;;;MAGIM,UAAU0E,KAAK8C,MAAL,EAAhB;MACI4M,iBAAJ;MACIpU,QAAQN,MAAR,KAAmB,CAAvB,EAA0B;eACbM,QAAQwH,MAAR,EAAX;;;GAGDxH,OAAD,EAAUoU,QAAV,EAAoBhW,OAApB,CAA4B,UAACZ,KAAD,EAAW;QACjCsE,iBAAerF,IAAf,CAAoBsX,OAAOvW,KAAP,CAApB,CAAJ,EAAwC;eAC7B,EAAT;;GAFJ;;SAMO2I,KAAP;;;;;AAKF,AAAO,SAASkO,cAAT,CAAwB3P,IAAxB,EAA8B;MAC/ByB,QAAQ,CAAZ;MACMsE,WAAW/F,KAAKpB,IAAL,EAAjB;MACMG,UAAUgH,SAASjP,GAAT,CAAa,CAAb,CAAhB;;MAEIiI,WAAWA,QAAQ5D,OAAR,KAAoB,YAAnC,EAAiD;aACtC,EAAT;;;MAGEiC,iBAAerF,IAAf,CAAoBsX,OAAOtJ,QAAP,CAApB,CAAJ,EAA2C;aAChC,EAAT;;;SAGKtE,KAAP;;;AAGF,AAAO,SAASmO,iBAAT,CAA2B5P,IAA3B,EAAiC;MAClCyB,QAAQ,CAAZ;;MAEMtB,QAAQwB,WAAW3B,KAAKhH,IAAL,CAAU,OAAV,CAAX,CAAd;MACMiH,SAAS0B,WAAW3B,KAAKhH,IAAL,CAAU,QAAV,CAAX,CAAf;MACM6W,MAAM7P,KAAKhH,IAAL,CAAU,KAAV,CAAZ;;;MAGImH,SAASA,SAAS,EAAtB,EAA0B;aACf,EAAT;;;;MAIEF,UAAUA,UAAU,EAAxB,EAA4B;aACjB,EAAT;;;MAGEE,SAASF,MAAT,IAAmB,CAAC4P,IAAIlV,QAAJ,CAAa,QAAb,CAAxB,EAAgD;QACxCmV,OAAO3P,QAAQF,MAArB;QACI6P,OAAO,IAAX,EAAiB;;eACN,GAAT;KADF,MAEO;eACI3N,KAAK4N,KAAL,CAAWD,OAAO,IAAlB,CAAT;;;;SAIGrO,KAAP;;;AAGF,AAAO,SAASuO,eAAT,CAAyBC,KAAzB,EAAgCrW,KAAhC,EAAuC;SACpCqW,MAAMjV,MAAN,GAAe,CAAhB,GAAqBpB,KAA5B;;;ACxGF;;;;;;;;AAQA,IAAMsW,+BAA+B;SAAA,yBACA;QAAzB1X,CAAyB,QAAzBA,CAAyB;QAAtBkC,OAAsB,QAAtBA,OAAsB;QAAboT,SAAa,QAAbA,SAAa;;QAC7BqC,iBAAJ;;;;;;QAMMC,WACJlI,gBACE1P,CADF,EAEEqW,wBAFF,EAGEf,SAHF,EAIE,KAJF,CADF;;QAQIsC,QAAJ,EAAc;iBACDrD,QAAWqD,QAAX,CAAX;;UAEID,QAAJ,EAAc,OAAOA,QAAP;;;;;;QAMVE,OAAO7X,EAAE,KAAF,EAASkC,OAAT,EAAkBgO,OAAlB,EAAb;QACM4H,YAAY,EAAlB;;SAEK5W,OAAL,CAAa,UAACH,GAAD,EAAMK,KAAN,EAAgB;UACrBoG,OAAOxH,EAAEe,GAAF,CAAb;UACMsW,MAAM7P,KAAKhH,IAAL,CAAU,KAAV,CAAZ;;UAEI,CAAC6W,GAAL,EAAU;;UAENpO,QAAQ6N,cAAcO,GAAd,CAAZ;eACSN,UAAUvP,IAAV,CAAT;eACSwP,eAAexP,IAAf,CAAT;eACS2P,eAAe3P,IAAf,CAAT;eACS4P,kBAAkB5P,IAAlB,CAAT;eACSgQ,gBAAgBK,IAAhB,EAAsBzW,KAAtB,CAAT;;gBAEUiW,GAAV,IAAiBpO,KAAjB;KAbF;;gCAiBE5B,QAAQrG,OAAR,CAAgB8W,SAAhB,EAA2B1P,MAA3B,CAAkC,UAACC,GAAD,EAAMf,GAAN;aAChCwQ,UAAUxQ,GAAV,IAAiBe,IAAI,CAAJ,CAAjB,GAA0B,CAACf,GAAD,EAAMwQ,UAAUxQ,GAAV,CAAN,CAA1B,GAAkDe,GADlB;KAAlC,EAEE,CAAC,IAAD,EAAO,CAAP,CAFF,CA5C+B;;;;QA2C1B0P,MA3C0B;QA2ClB3K,QA3CkB;;;QAgD7BA,WAAW,CAAf,EAAkB;iBACLmH,QAAWwD,MAAX,CAAX;;UAEIJ,QAAJ,EAAc,OAAOA,QAAP;;;;;;;;;;2BAKOrB,wBAAvB,8HAAiD;YAAtC1S,QAAsC;;YACzCtD,QAAQN,EAAE4D,QAAF,EAAYoK,KAAZ,EAAd;YACMqJ,MAAM/W,MAAME,IAAN,CAAW,KAAX,CAAZ;YACI6W,GAAJ,EAAS;qBACI9C,QAAW8C,GAAX,CAAX;cACIM,QAAJ,EAAc,OAAOA,QAAP;;;YAGVK,OAAO1X,MAAME,IAAN,CAAW,MAAX,CAAb;YACIwX,IAAJ,EAAU;qBACGzD,QAAWyD,IAAX,CAAX;cACIL,QAAJ,EAAc,OAAOA,QAAP;;;YAGVpX,QAAQD,MAAME,IAAN,CAAW,OAAX,CAAd;YACID,KAAJ,EAAW;qBACEgU,QAAWhU,KAAX,CAAX;cACIoX,QAAJ,EAAc,OAAOA,QAAP;;;;;;;;;;;;;;;;;;WAIX,IAAP;;CA9EJ;;AAkFA,AAEA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AC7Ge,SAASM,eAAT,CAAyBhP,KAAzB,EAAgCiP,UAAhC,EAA4CF,IAA5C,EAAkD;;;;;;MAM3D/O,QAAQ,CAAZ,EAAe;QACPkP,aAAa,IAAIC,QAAQC,eAAZ,CAA4B,IAA5B,EAAkCH,UAAlC,EAA8CF,IAA9C,EAAoDM,KAApD,EAAnB;;;;;;;QAOMC,cAAc,MAAMJ,UAA1B;QACMK,eAAe,EAAE,OAAOD,cAAc,GAArB,CAAF,CAArB;WACOtP,QAAQuP,YAAf;;;SAGK,CAAP;;;ACnBa,SAASC,aAAT,CAAuBjJ,QAAvB,EAAiC3D,OAAjC,EAA0C;;;;;MAKnD5C,QAAQ,CAAZ;;MAEIyC,YAAYnM,IAAZ,CAAiBiQ,SAAS9G,IAAT,EAAjB,CAAJ,EAAuC;QAC/BgQ,gBAAgBhR,SAAS8H,QAAT,EAAmB,EAAnB,CAAtB;;;;QAIIkJ,gBAAgB,CAApB,EAAuB;cACb,CAAC,EAAT;KADF,MAEO;cACG/O,KAAKE,GAAL,CAAS,CAAT,EAAY,KAAK6O,aAAjB,CAAR;;;;;;QAME7M,WAAWA,WAAW6M,aAA1B,EAAyC;eAC9B,EAAT;;;;SAIGzP,KAAP;;;AC5Ba,SAAS0P,eAAT,CAAyB9M,OAAzB,EAAkC+M,IAAlC,EAAwC;;;;MAIjD/M,WAAW,CAAC+M,IAAhB,EAAsB;WACb,EAAP;;;SAGK,CAAP;;;ACRK,IAAMC,aAAW,IAAjB;;;;AAIP,AAAO,IAAM3T,0BAAwB,CACnC,OADmC,EAEnC,SAFmC,EAGnC,SAHmC,EAInC,SAJmC,EAKnC,QALmC,EAMnC,OANmC,EAOnC,OAPmC,EAQnC,OARmC,EASnC,KATmC,EAUnC,OAVmC,EAWnC,MAXmC,EAYnC,QAZmC,EAanC,KAbmC,EAcnC,iBAdmC,CAA9B;AAgBP,AAAO,IAAMC,6BAA2B,IAAIhH,MAAJ,CAAW+G,wBAAsB9G,IAAtB,CAA2B,GAA3B,CAAX,EAA4C,GAA5C,CAAjC;;;;;AAKP,AAAO,IAAM0a,sBAAoB,IAAI3a,MAAJ,CAAW,4CAAX,EAAyD,GAAzD,CAA1B;;;;AAIP,AAAO,IAAM4a,qBAAmB,IAAI5a,MAAJ,CAAW,kBAAX,EAA+B,GAA/B,CAAzB;;;;AAIP,AAAO,IAAM6a,sBAAoB,IAAI7a,MAAJ,CAAW,yBAAX,EAAsC,GAAtC,CAA1B,CAEP;;ACjCe,SAAS8a,oBAAT,CAA8BjB,IAA9B,EAAoC;;MAE7C7S,2BAAyB5F,IAAzB,CAA8ByY,IAA9B,CAAJ,EAAyC;WAChC,CAAC,EAAR;;;SAGK,CAAP;;;ACAF,SAASkB,SAAT,CAAiBC,KAAjB,EAAwB;UACZA,MAAM3Y,IAAN,CAAW,OAAX,KAAuB,EAAjC,WAAuC2Y,MAAM3Y,IAAN,CAAW,IAAX,KAAoB,EAA3D;;;AAGF,AAAe,SAASwW,gBAAT,CAAwBmC,KAAxB,EAA+B;;;;MAIxCrW,UAAUqW,MAAM7O,MAAN,EAAd;MACI8O,gBAAgB,KAApB;MACIC,gBAAgB,KAApB;MACIpQ,QAAQ,CAAZ;;QAEMhJ,IAAN,CAAWxC,MAAM,CAAN,EAAS,CAAT,CAAX,EAAwByD,OAAxB,CAAgC,YAAM;QAChC4B,QAAQN,MAAR,KAAmB,CAAvB,EAA0B;;;;QAIpB8W,aAAaJ,UAAQpW,OAAR,EAAiB,GAAjB,CAAnB;;;;QAII,CAACsW,aAAD,IAAkBhU,QAAQ7F,IAAR,CAAa+Z,UAAb,CAAtB,EAAgD;sBAC9B,IAAhB;eACS,EAAT;;;;;;QAME,CAACD,aAAD,IAAkBrU,kBAAkBzF,IAAlB,CAAuB+Z,UAAvB,CAAlB,IACEnU,2BAAyB5F,IAAzB,CAA8B+Z,UAA9B,CADN,EACiD;UAC3C,CAACxU,kBAAkBvF,IAAlB,CAAuB+Z,UAAvB,CAAL,EAAyC;wBACvB,IAAhB;iBACS,EAAT;;;;cAIMxW,QAAQwH,MAAR,EAAV;GAzBF;;SA4BOrB,KAAP;;;AC/Ca,SAASsQ,aAAT,CAAuBC,QAAvB,EAAiC;;;MAG1CR,oBAAkBzZ,IAAlB,CAAuBia,QAAvB,CAAJ,EAAsC;WAC7B,CAAC,GAAR;;;SAGK,CAAP;;;ACFa,SAASC,WAAT,CACbzB,IADa,EAEbE,UAFa,EAGbwB,OAHa,EAIbja,SAJa,EAKb+P,QALa,EAMbmK,YANa,EAOb;;MAEIA,aAAanY,IAAb,CAAkB;WAAOwW,SAASxY,GAAhB;GAAlB,MAA2CuR,SAA/C,EAA0D;WACjD,KAAP;;;;;MAKE,CAACiH,IAAD,IAASA,SAASE,UAAlB,IAAgCF,SAAS0B,OAA7C,EAAsD;WAC7C,KAAP;;;MAGM7b,QAZR,GAYqB4B,SAZrB,CAYQ5B,QAZR;;mBAa+B6B,IAAIC,KAAJ,CAAUqY,IAAV,CAb/B;;MAakB4B,QAblB,cAaQ/b,QAbR;;;;MAgBI+b,aAAa/b,QAAjB,EAA2B;WAClB,KAAP;;;;;MAKIgc,WAAW7B,KAAK/M,OAAL,CAAayO,OAAb,EAAsB,EAAtB,CAAjB;MACI,CAACb,WAAStZ,IAAT,CAAcsa,QAAd,CAAL,EAA8B;WACrB,KAAP;;;;;MAKE1U,2BAAyB5F,IAAzB,CAA8BiQ,QAA9B,CAAJ,EAA6C;WACpC,KAAP;;;;MAIEA,SAAShN,MAAT,GAAkB,EAAtB,EAA0B;WACjB,KAAP;;;SAGK,IAAP;;;ACpDa,SAASsX,YAAT,CAAsB9B,IAAtB,EAA4B+B,SAA5B,EAAuC;;;;;MAKhD,CAACA,UAAUxa,IAAV,CAAeyY,IAAf,CAAL,EAA2B;WAClB,CAAC,EAAR;;;SAGK,CAAP;;;ACPa,SAASgC,iBAAT,CAA2BR,QAA3B,EAAqC;;MAE9CV,oBAAkBvZ,IAAlB,CAAuBia,QAAvB,CAAJ,EAAsC;WAC7B,EAAP;;;SAGK,CAAP;;;ACHa,SAASS,aAAT,CAAuBT,QAAvB,EAAiC;;MAE1CT,mBAAiBxZ,IAAjB,CAAsBia,QAAtB,CAAJ,EAAqC;;;;;QAK/BV,oBAAkBvZ,IAAlB,CAAuBia,QAAvB,CAAJ,EAAsC;aAC7B,CAAC,EAAR;;;;SAIG,CAAP;;;ACKK,SAASU,aAAT,CAAuBR,OAAvB,EAAgC;SAC9B,IAAIvb,MAAJ,OAAeub,OAAf,EAA0B,GAA1B,CAAP;;;AAGF,SAASR,OAAT,CAAiBC,KAAjB,EAAwB3J,QAAxB,EAAkC;UACtBA,YAAY2J,MAAM1Q,IAAN,EAAtB,WAAsC0Q,MAAM3Y,IAAN,CAAW,OAAX,KAAuB,EAA7D,WAAmE2Y,MAAM3Y,IAAN,CAAW,IAAX,KAAoB,EAAvF;;;AAGF,AAAe,SAAS2Z,UAAT,OAOZ;MANDC,KAMC,QANDA,KAMC;MALDlC,UAKC,QALDA,UAKC;MAJDwB,OAIC,QAJDA,OAIC;MAHDja,SAGC,QAHDA,SAGC;MAFDO,CAEC,QAFDA,CAEC;+BADD2Z,YACC;MADDA,YACC,qCADc,EACd;;cACWla,aAAaC,IAAIC,KAAJ,CAAUuY,UAAV,CAAzB;MACM6B,YAAYG,cAAcR,OAAd,CAAlB;MACMd,OAAO3H,YAAYjR,CAAZ,CAAb;;;;;;;;;MASMqa,cAAcD,MAAMhS,MAAN,CAAa,UAACkS,aAAD,EAAgBC,IAAhB,EAAyB;;;;QAIlDvC,OAAOlM,aAAayO,KAAKtZ,OAAL,CAAa+W,IAA1B,CAAb;QACMmB,QAAQnZ,EAAEua,IAAF,CAAd;QACM/K,WAAW2J,MAAM1Q,IAAN,EAAjB;;QAEI,CAACgR,YAAYzB,IAAZ,EAAkBE,UAAlB,EAA8BwB,OAA9B,EAAuCja,SAAvC,EAAkD+P,QAAlD,EAA4DmK,YAA5D,CAAL,EAAgF;aACvEW,aAAP;;;;QAIE,CAACA,cAActC,IAAd,CAAL,EAA0B;oBACVA,IAAd,IAAsB;eACb,CADa;0BAAA;;OAAtB;KADF,MAMO;oBACSA,IAAd,EAAoBxI,QAApB,GAAkC8K,cAActC,IAAd,EAAoBxI,QAAtD,SAAkEA,QAAlE;;;QAGIgL,eAAeF,cAActC,IAAd,CAArB;QACMwB,WAAWN,QAAQC,KAAR,EAAe3J,QAAf,CAAjB;QACM3D,UAAUF,eAAeqM,IAAf,CAAhB;;QAEI/O,QAAQ6Q,aAAa9B,IAAb,EAAmB+B,SAAnB,CAAZ;aACSC,kBAAkBR,QAAlB,CAAT;aACSS,cAAcT,QAAd,CAAT;aACSD,cAAcC,QAAd,CAAT;aACSxC,iBAAemC,KAAf,CAAT;aACSF,qBAAqBjB,IAArB,CAAT;aACSW,gBAAgB9M,OAAhB,EAAyB+M,IAAzB,CAAT;aACSH,cAAcjJ,QAAd,EAAwB3D,OAAxB,CAAT;aACSoM,gBAAgBhP,KAAhB,EAAuBiP,UAAvB,EAAmCF,IAAnC,CAAT;;iBAEa/O,KAAb,GAAqBA,KAArB;;WAEOqR,aAAP;GAvCkB,EAwCjB,EAxCiB,CAApB;;SA0COjT,QAAQrG,OAAR,CAAgBqZ,WAAhB,EAA6B7X,MAA7B,KAAwC,CAAxC,GAA4C,IAA5C,GAAmD6X,WAA1D;;;AClFF;;AAEA,IAAMI,8BAA8B;SAAA,yBACgB;QAAxCza,CAAwC,QAAxCA,CAAwC;QAArCR,GAAqC,QAArCA,GAAqC;QAAhCC,SAAgC,QAAhCA,SAAgC;iCAArBka,YAAqB;QAArBA,YAAqB,qCAAN,EAAM;;gBACpCla,aAAaC,IAAIC,KAAJ,CAAUH,GAAV,CAAzB;;QAEM0Y,aAAapM,aAAatM,GAAb,CAAnB;QACMka,UAAUrN,eAAe7M,GAAf,EAAoBC,SAApB,CAAhB;;QAEM2a,QAAQpa,EAAE,SAAF,EAAakQ,OAAb,EAAd;;QAEMwK,cAAcP,WAAW;kBAAA;4BAAA;sBAAA;0BAAA;UAAA;;KAAX,CAApB;;;QAUI,CAACO,WAAL,EAAkB,OAAO,IAAP;;;;QAIZC,UAAUtT,QAAQrG,OAAR,CAAgB0Z,WAAhB,EAA6BtS,MAA7B,CAAoC,UAACC,GAAD,EAAMkS,IAAN,EAAe;UAC3DK,aAAaF,YAAYH,IAAZ,CAAnB;aACOK,WAAW3R,KAAX,GAAmBZ,IAAIY,KAAvB,GAA+B2R,UAA/B,GAA4CvS,GAAnD;KAFc,EAGb,EAAEY,OAAO,CAAC,GAAV,EAHa,CAAhB;;;;QAOI0R,QAAQ1R,KAAR,IAAiB,EAArB,EAAyB;aAChB0R,QAAQ3C,IAAf;;;WAGK,IAAP;;CAlCJ,CAuCA;;AClDO,IAAM6C,2BAA2B,CACtC,QADsC,CAAjC;;ACKP,SAASC,WAAT,CAAqBtb,GAArB,EAA0B;MAClBC,YAAYC,IAAIC,KAAJ,CAAUH,GAAV,CAAlB;MACQ3B,QAFgB,GAEH4B,SAFG,CAEhB5B,QAFgB;;SAGjBA,QAAP;;;AAGF,SAASmE,MAAT,CAAgBxC,GAAhB,EAAqB;SACZ;YAAA;YAEGsb,YAAYtb,GAAZ;GAFV;;;AAMF,IAAMub,sBAAsB;SAAA,yBACK;QAArB/a,CAAqB,QAArBA,CAAqB;QAAlBR,GAAkB,QAAlBA,GAAkB;QAAb8V,SAAa,QAAbA,SAAa;;QACvB0F,aAAahb,EAAE,qBAAF,CAAnB;QACIgb,WAAWxY,MAAX,KAAsB,CAA1B,EAA6B;UACrBwV,OAAOgD,WAAWxa,IAAX,CAAgB,MAAhB,CAAb;UACIwX,IAAJ,EAAU;eACDhW,OAAOgW,IAAP,CAAP;;;;QAIEiD,UAAUvL,gBAAgB1P,CAAhB,EAAmB6a,wBAAnB,EAA6CvF,SAA7C,CAAhB;QACI2F,OAAJ,EAAa;aACJjZ,OAAOiZ,OAAP,CAAP;;;WAGKjZ,OAAOxC,GAAP,CAAP;;CAfJ,CAoBA;;ACtCO,IAAM0b,yBAAyB,CACpC,gBADoC,EAEpC,qBAFoC,CAA/B;;ACSA,SAAStZ,OAAT,CAAeM,OAAf,EAAwBlC,CAAxB,EAA4C;MAAjBmb,SAAiB,yDAAL,GAAK;;YACvCjZ,QAAQ+I,OAAR,CAAgB,UAAhB,EAA4B,GAA5B,EAAiCvC,IAAjC,EAAV;SACO0S,UAAUlZ,OAAV,EAAmBiZ,SAAnB,EAA8B,EAAEE,SAAS,UAAX,EAA9B,CAAP;;;AAGF,IAAMC,0BAA0B;SAAA,yBACK;QAAzBtb,CAAyB,QAAzBA,CAAyB;QAAtBkC,OAAsB,QAAtBA,OAAsB;QAAboT,SAAa,QAAbA,SAAa;;QAC3BiG,UAAU7L,gBAAgB1P,CAAhB,EAAmBkb,sBAAnB,EAA2C5F,SAA3C,CAAhB;QACIiG,OAAJ,EAAa;aACJ3Z,QAAMwO,UAAUmL,OAAV,EAAmBvb,CAAnB,CAAN,CAAP;;;QAGImb,YAAY,GAAlB;QACMK,eAAetZ,QAAQ6H,KAAR,CAAc,CAAd,EAAiBoR,YAAY,CAA7B,CAArB;WACOvZ,QAAM5B,EAAEwb,YAAF,EAAgB/S,IAAhB,EAAN,EAA8BzI,CAA9B,EAAiCmb,SAAjC,CAAP;;CATJ,CAaA;;ACvBA,IAAMM,4BAA4B;SAAA,yBACX;QAAXvZ,OAAW,QAAXA,OAAW;;QACblC,IAAIoC,QAAQC,IAAR,CAAaH,OAAb,CAAV;;QAEMuG,OAAOuC,gBAAgBhL,EAAE,KAAF,EAASgO,KAAT,GAAiBvF,IAAjB,EAAhB,CAAb;WACOA,KAAKsD,KAAL,CAAW,IAAX,EAAiBvJ,MAAxB;;CALJ,CASA;;ACCA,IAAMkZ,mBAAmB;;UAEf,GAFe;SAGhBrG,sBAAsBsG,OAHN;kBAIPzF,8BAA8ByF,OAJvB;UAKf/F,uBAAuB+F,OALR;WAMd/G,wBAAwB+G,OAAxB,CAAgCC,IAAhC,CAAqChH,uBAArC,CANc;kBAOP8C,6BAA6BiE,OAPtB;OAQlBvF,oBAAoBuF,OARF;iBASRlB,4BAA4BkB,OATpB;kBAUPZ,oBAAoBY,OAVb;WAWdL,wBAAwBK,OAXV;cAYXF,0BAA0BE,OAZf;aAaZ;QAAG9M,KAAH,QAAGA,KAAH;WAAegN,gBAAgBC,YAAhB,CAA6BjN,KAA7B,CAAf;GAbY;;SAAA,mBAeftQ,OAfe,EAeN;QACPoS,IADO,GACEpS,OADF,CACPoS,IADO;;;QAGXA,IAAJ,EAAU;UACF3Q,IAAIoC,QAAQC,IAAR,CAAasO,IAAb,CAAV;cACQ3Q,CAAR,GAAYA,CAAZ;;;QAGI6O,QAAQ,KAAKA,KAAL,CAAWtQ,OAAX,CAAd;QACMwd,iBAAiB,KAAKA,cAAL,CAAoBxd,OAApB,CAAvB;QACM0T,SAAS,KAAKA,MAAL,CAAY1T,OAAZ,CAAf;QACM2D,UAAU,KAAKA,OAAL,cAAkB3D,OAAlB,IAA2BsQ,YAA3B,IAAhB;QACMmN,iBAAiB,KAAKA,cAAL,cAAyBzd,OAAzB,IAAkC2D,gBAAlC,IAAvB;QACMoQ,MAAM,KAAKA,GAAL,cAAc/T,OAAd,IAAuB2D,gBAAvB,IAAZ;QACM+Z,gBAAgB,KAAKA,aAAL,CAAmB1d,OAAnB,CAAtB;QACMgd,UAAU,KAAKA,OAAL,cAAkBhd,OAAlB,IAA2B2D,gBAA3B,IAAhB;QACMga,aAAa,KAAKA,UAAL,cAAqB3d,OAArB,IAA8B2D,gBAA9B,IAAnB;QACMia,YAAY,KAAKA,SAAL,CAAe,EAAEtN,YAAF,EAAf,CAAlB;;0BACwB,KAAKuN,cAAL,CAAoB7d,OAApB,CAlBT;;QAkBPiB,GAlBO,mBAkBPA,GAlBO;QAkBF6c,MAlBE,mBAkBFA,MAlBE;;;WAoBR;kBAAA;oBAAA;sBAGWN,kBAAkB,IAH7B;cAAA;oCAAA;sBAAA;kCAAA;cAAA;oBAAA;sBAAA;4BAAA;;KAAP;;CAnCJ,CAoDA;;AC7De,SAASO,YAAT,CAAsB9c,GAAtB,EAA2BC,SAA3B,EAAsC;cACvCA,aAAaC,IAAIC,KAAJ,CAAUH,GAAV,CAAzB;mBACqBC,SAF8B;MAE3C5B,QAF2C,cAE3CA,QAF2C;;MAG7C0e,aAAa1e,SAASkO,KAAT,CAAe,GAAf,EAAoBhC,KAApB,CAA0B,CAAC,CAA3B,EAA8B3L,IAA9B,CAAmC,GAAnC,CAAnB;;SAEOkF,WAAWzF,QAAX,KAAwByF,WAAWiZ,UAAX,CAAxB,IAAkDb,gBAAzD;;;ACVK,IAAMc,UAAU,cAAhB;;ACOP;AACA,AAAO,SAASC,gBAAT,CAA0BrN,QAA1B,EAAoCpP,CAApC,QAAkD;MAAT4B,KAAS,QAATA,KAAS;;MACnD,CAACA,KAAL,EAAY,OAAO,IAAP;;IAEVA,MAAMxD,IAAN,CAAW,GAAX,CAAF,EAAmBgR,QAAnB,EAA6BzN,MAA7B;;SAEOyN,QAAP;;;;AAIF,AAAO,SAASsN,iBAAT,CAA2BtN,QAA3B,EAAqCpP,CAArC,SAAwD;MAAd2c,UAAc,SAAdA,UAAc;;MACzD,CAACA,UAAL,EAAiB,OAAO,IAAP;;UAET3b,OAAR,CAAgB2b,UAAhB,EAA4Bzb,OAA5B,CAAoC,UAACoG,GAAD,EAAS;QACrCsV,WAAW5c,EAAEsH,GAAF,EAAO8H,QAAP,CAAjB;QACM7O,QAAQoc,WAAWrV,GAAX,CAAd;;;QAGI,OAAO/G,KAAP,KAAiB,QAArB,EAA+B;eACpBJ,IAAT,CAAc,UAACiB,KAAD,EAAQf,IAAR,EAAiB;sBACfL,EAAEK,IAAF,CAAd,EAAuBL,CAAvB,EAA0B2c,WAAWrV,GAAX,CAA1B;OADF;KADF,MAIO,IAAI,OAAO/G,KAAP,KAAiB,UAArB,EAAiC;;eAE7BJ,IAAT,CAAc,UAACiB,KAAD,EAAQf,IAAR,EAAiB;YACvB2B,SAASzB,MAAMP,EAAEK,IAAF,CAAN,EAAeL,CAAf,CAAf;;YAEI,OAAOgC,MAAP,KAAkB,QAAtB,EAAgC;wBAChBhC,EAAEK,IAAF,CAAd,EAAuBL,CAAvB,EAA0BgC,MAA1B;;OAJJ;;GAXJ;;SAqBOoN,QAAP;;;AAGF,AAAO,SAASyN,MAAT,CAAgBnI,IAAhB,EAAsB;MACnB1U,CADmB,GAC8B0U,IAD9B,CACnB1U,CADmB;MAChBqB,IADgB,GAC8BqT,IAD9B,CAChBrT,IADgB;MACVyb,cADU,GAC8BpI,IAD9B,CACVoI,cADU;0BAC8BpI,IAD9B,CACMqI,WADN;MACMA,WADN,qCACoB,KADpB;;;MAGvB,CAACD,cAAL,EAAqB,OAAO,IAAP;;;;MAIjB,OAAOA,cAAP,KAA0B,QAA9B,EAAwC,OAAOA,cAAP;;MAEhCrM,SATmB,GASkBqM,cATlB,CASnBrM,SATmB;8BASkBqM,cATlB,CASRE,cATQ;MASRA,cATQ,yCASS,IATT;;;MAWrBC,mBAAmBxM,UAAUjP,IAAV,CAAe;WAAYxB,EAAE4D,QAAF,EAAYpB,MAAZ,KAAuB,CAAvB,IAA4BxC,EAAE4D,QAAF,EAAY6E,IAAZ,GAAmBC,IAAnB,OAA8B,EAAtE;GAAf,CAAzB;;MAEI,CAACuU,gBAAL,EAAuB,OAAO,IAAP;;;;;;;;MAQnBF,WAAJ,EAAiB;QACX3N,WAAWpP,EAAEid,gBAAF,CAAf;;;aAGSC,IAAT,CAAcld,EAAE,aAAF,CAAd;eACWoP,SAAS9E,MAAT,EAAX;;eAEWoS,kBAAkBtN,QAAlB,EAA4BpP,CAA5B,EAA+B8c,cAA/B,CAAX;eACWL,iBAAiBrN,QAAjB,EAA2BpP,CAA3B,EAA8B8c,cAA9B,CAAX;;QAEIE,cAAJ,EAAoB;iBACP1I,SAASjT,IAAT,EAAe+N,QAAf,EAAyBsF,IAAzB,CAAX;;;WAGK1U,EAAE2Q,IAAF,CAAOvB,QAAP,CAAP;;;;MAII5O,OAAOyc,iBAAiB5T,KAAjB,CAAuBmT,OAAvB,CAAb;MACIxa,eAAJ;;MAEIxB,IAAJ,EAAU;aACCR,EAAEid,gBAAF,EAAoBzc,IAApB,CAAyBA,KAAK,CAAL,CAAzB,CAAT;GADF,MAEO;;aAEIR,EAAEid,gBAAF,EAAoBxU,IAApB,EAAT;;;;;MAKEuU,cAAJ,EAAoB;WACX1I,SAASjT,IAAT,EAAeW,MAAf,EAAuB0S,IAAvB,CAAP;;;SAGK1S,MAAP;;;AAGF,SAASmb,aAAT,CAAuBzI,IAAvB,EAA6B;MACnBrT,IADmB,GACCqT,IADD,CACnBrT,IADmB;MACb+b,SADa,GACC1I,IADD,CACb0I,SADa;;;;;SAKpBP,oBAAYnI,IAAZ,IAAkBoI,gBAAgBM,UAAU/b,IAAV,CAAlC,QACLqa,iBAAiBra,IAAjB,EAAuBqT,IAAvB,CADF;;;AAIF,IAAM2I,gBAAgB;SAAA,qBACwB;QAApCD,SAAoC,yDAAxB1B,gBAAwB;QAANhH,IAAM;gBACFA,IADE;QAClC4I,WADkC,SAClCA,WADkC;QACrBC,cADqB,SACrBA,cADqB;;;QAGtCH,UAAUf,MAAV,KAAqB,GAAzB,EAA8B,OAAOe,UAAUzB,OAAV,CAAkBjH,IAAlB,CAAP;;wBAGzBA,IADL;;;;QAKI4I,WAAJ,EAAiB;UACTpb,WAAUib,2BACXzI,IADW,IACLrT,MAAM,SADD,EACY0b,aAAa,IADzB,EAC+BlO,OAAO0O;SADtD;aAGO;;OAAP;;QAII1O,QAAQsO,2BAAmBzI,IAAnB,IAAyBrT,MAAM,OAA/B,IAAd;QACM0a,iBAAiBoB,2BAAmBzI,IAAnB,IAAyBrT,MAAM,gBAA/B,IAAvB;QACM4Q,SAASkL,2BAAmBzI,IAAnB,IAAyBrT,MAAM,QAA/B,IAAf;QACM4a,gBAAgBkB,2BAAmBzI,IAAnB,IAAyBrT,MAAM,eAA/B,IAAtB;QACMa,UAAUib,2BACXzI,IADW,IACLrT,MAAM,SADD,EACY0b,aAAa,IADzB,EAC+BlO;OAD/C;QAGMmN,iBAAiBmB,2BAAmBzI,IAAnB,IAAyBrT,MAAM,gBAA/B,EAAiDa,gBAAjD,IAAvB;QACMoQ,MAAM6K,2BAAmBzI,IAAnB,IAAyBrT,MAAM,KAA/B,EAAsCa,gBAAtC,IAAZ;QACMqZ,UAAU4B,2BAAmBzI,IAAnB,IAAyBrT,MAAM,SAA/B,EAA0Ca,gBAA1C,IAAhB;QACMga,aAAaiB,2BAAmBzI,IAAnB,IAAyBrT,MAAM,YAA/B,EAA6Ca,gBAA7C,IAAnB;QACMia,YAAYgB,2BAAmBzI,IAAnB,IAAyBrT,MAAM,WAA/B,EAA4CwN,YAA5C,IAAlB;;yBACwBsO,2BAAmBzI,IAAnB,IAAyBrT,MAAM,gBAA/B,IA9BkB;;QA8BlC7B,GA9BkC,kBA8BlCA,GA9BkC;QA8B7B6c,MA9B6B,kBA8B7BA,MA9B6B;;;WAgCnC;kBAAA;sBAAA;oBAAA;oCAAA;oCAAA;cAAA;kCAAA;cAAA;oBAAA;sBAAA;4BAAA;;KAAP;;CAjCJ,CAkDA;;AC5JA;sDAAe;QAEXJ,aAFW,SAEXA,aAFW;QAGXtL,IAHW,SAGXA,IAHW;QAIX3Q,CAJW,SAIXA,CAJW;QAKXsV,SALW,SAKXA,SALW;QAMXtT,MANW,SAMXA,MANW;QAOXwb,SAPW,SAOXA,SAPW;QAQX3O,KARW,SAQXA,KARW;QASXrP,GATW,SASXA,GATW;;;;;;;iBAAA,GAaD,CAbC;wBAAA,GAcQ,CAACsM,aAAatM,GAAb,CAAD,CAdR;;;;;;kBAkBNyc,iBAAiBwB,QAAQ,EAlBnB;;;;;qBAmBF,CAAT;;mBACU5b,SAAS6b,MAAT,CAAgBzB,aAAhB,CApBC;;;aAAA;;mBAqBJjc,EAAE2Q,IAAF,EAAP;;yBArBW,GAuBW;mBACfsL,aADe;wBAAA;kBAAA;kCAAA;2BAKP,IALO;8BAMJpN,KANI;;aAvBX;0BAAA,GAiCYwO,cAAc1B,OAAd,CAAsB6B,SAAtB,EAAiCG,aAAjC,CAjCZ;;;yBAmCE5Q,IAAb,CAAkBkP,aAAlB;kCAEKja,MADL;sCAGMA,OAAOE,OADX,yCAGaub,KAHb,uBAIIG,eAAe1b,OAJnB;;;4BAQc0b,eAAe3B,aAA/B;;;;;0DAIGja,MAlDQ;2BAmDEyb,KAnDF;8BAoDKA;;;;;;;;;GApDpB;;WAA8BI,eAA9B;;;;SAA8BA,eAA9B;;;ACMA,IAAMC,UAAU;OAAA,iBACFte,GADE,EACGmR,IADH,EACoB;;;QAAX+D,IAAW,yDAAJ,EAAI;;;;;;;;qBACCA,QAAQ,IADT;wCACxBqJ,aADwB;2BAAA,sCACR,IADQ;uBAAA,GAGdre,IAAIC,KAAJ,CAAUH,GAAV,CAHc;;kBAK3B5B,YAAY6B,SAAZ,CAL2B;;;;;+CAMvB3B,OAAO+B,MANgB;;;uBAAA,GASdyc,aAAa9c,GAAb,EAAkBC,SAAlB,CATc;;;;qBAYhBoC,SAAS6b,MAAT,CAAgBle,GAAhB,EAAqBmR,IAArB,EAA2BlR,SAA3B,CAZgB;;;eAAA;;mBAe5BO,EAAEb,KAf0B;;;;;+CAgBvBa,CAhBuB;;;;qBAmBzBA,EAAE2Q,IAAF,EAAP;;;;uBAnBgC,GAuBd3Q,EAAE,MAAF,EAAU2D,GAAV,CAAc,UAACvD,CAAD,EAAIC,IAAJ;uBAAaL,EAAEK,IAAF,EAAQG,IAAR,CAAa,MAAb,CAAb;eAAd,EAAiD0P,OAAjD,EAvBc;oBAAA,GAyBnBmN,cAAc1B,OAAd,CAAsB6B,SAAtB,EAAiC,EAAEhe,QAAF,EAAOmR,UAAP,EAAa3Q,IAAb,EAAgBsV,oBAAhB,EAA2B7V,oBAA3B,EAAjC,CAzBmB;wBA0BCuC,MA1BD;mBAAA,WA0BxB6M,KA1BwB;2BAAA,WA0BjBoN,aA1BiB;;;;oBA6B5B8B,iBAAiB9B,aA7BW;;;;;;qBA8Bf4B,gBACb;oCAAA;4CAAA;0BAAA;oBAAA;oCAAA;8BAAA;4BAAA;;eADa,CA9Be;;;oBAAA;;;;;oCA4CzB7b,MADL;6BAEe,CAFf;gCAGkB;;;;+CAIbA,MAlDyB;;;;;;;;;;CADpC,CAwDA;;"}
\ No newline at end of file
diff --git a/package.json b/package.json
index 0ceb00d9..3d235ca2 100644
--- a/package.json
+++ b/package.json
@@ -1,8 +1,8 @@
 {
-  "name": "readability-parser",
+  "name": "mercury-parser",
   "version": "1.0.0",
   "description": "",
-  "main": "./dist/iris.js",
+  "main": "./dist/mercury.js",
   "scripts": {
     "start": "node ./build",
     "lint": "eslint src/** --fix",
diff --git a/rollup.config.js b/rollup.config.js
index 93e3e0c7..bfe3dbac 100644
--- a/rollup.config.js
+++ b/rollup.config.js
@@ -3,12 +3,12 @@ import babelrc from 'babelrc-rollup'
 import commonjs from 'rollup-plugin-commonjs'
 
 export default {
-  entry: 'src/iris.js',
+  entry: 'src/mercury.js',
   plugins: [
     commonjs(),
     babel(babelrc()),
   ],
   format: 'cjs',
-  dest: 'dist/iris.js', // equivalent to --output
+  dest: 'dist/mercury.js', // equivalent to --output
   sourceMap: true,
 }
diff --git a/src/extractors/custom/nymag.com/index.test.js b/src/extractors/custom/nymag.com/index.test.js
index 74608450..67a91ccf 100644
--- a/src/extractors/custom/nymag.com/index.test.js
+++ b/src/extractors/custom/nymag.com/index.test.js
@@ -1,14 +1,14 @@
 import assert from 'assert';
 import fs from 'fs';
 
-import Iris from 'iris';
+import Mercury from 'mercury';
 
 describe('NYMagExtractor', () => {
   it('works with a feature story', (async) () => {
     const html = fs.readFileSync('./fixtures/nymag.com/ailes.html');
     const uri = 'http://nymag.com/daily/intelligencer/2016/09/how-fox-news-women-took-down-roger-ailes.html';
 
-    const { dek, title, author } = await Iris.parse(uri, html);
+    const { dek, title, author } = await Mercury.parse(uri, html);
     const actualDek = 'How Fox News women took down the most powerful, and predatory, man in media.';
 
     assert.equal(dek, actualDek);
diff --git a/src/extractors/custom/twitter.com/index.test.js b/src/extractors/custom/twitter.com/index.test.js
index b4c62543..d297fa2e 100644
--- a/src/extractors/custom/twitter.com/index.test.js
+++ b/src/extractors/custom/twitter.com/index.test.js
@@ -1,14 +1,14 @@
 import assert from 'assert';
 import fs from 'fs';
 
-import Iris from 'iris';
+import Mercury from 'mercury';
 
 describe('TwitterExtractor', () => {
   it('works with a feature story', (async) () => {
     const html = fs.readFileSync('./fixtures/twitter.com/tweet.html');
     const uri = 'https://twitter.com/KingBeyonceStan/status/745276948213968896';
 
-    const { title, author, date_published } = await Iris.parse(uri, html);
+    const { title, author, date_published } = await Mercury.parse(uri, html);
 
     assert.equal(title, 'Lina Morgana on Twitter');
     assert.equal(author, '@KingBeyonceStan');
diff --git a/src/iris.js b/src/mercury.js
similarity index 97%
rename from src/iris.js
rename to src/mercury.js
index f82b8125..ddc85d68 100644
--- a/src/iris.js
+++ b/src/mercury.js
@@ -9,7 +9,7 @@ import getExtractor from 'extractors/get-extractor';
 import RootExtractor from 'extractors/root-extractor';
 import collectAllPages from 'extractors/collect-all-pages';
 
-const Iris = {
+const Mercury = {
   async parse(url, html, opts = {}) {
     const { fetchAllPages = true } = opts || true;
 
@@ -65,4 +65,4 @@ const Iris = {
 
 };
 
-export default Iris;
+export default Mercury;
diff --git a/src/iris.test.js b/src/mercury.test.js
similarity index 67%
rename from src/iris.test.js
rename to src/mercury.test.js
index 646b5647..88f3b9ed 100644
--- a/src/iris.test.js
+++ b/src/mercury.test.js
@@ -1,45 +1,45 @@
 import assert from 'assert';
 import { Errors } from 'utils';
 
-import Iris from './iris';
+import Mercury from './mercury';
 
-describe('Iris', () => {
+describe('Mercury', () => {
   describe('parse(url)', function test() {
     this.timeout(1000000);
     it('returns an error if a malformed url is passed', async function() {
-      const error = await Iris.parse('foo.com');
+      const error = await Mercury.parse('foo.com');
 
       assert.equal(error, Errors.badUrl);
     });
 
     it('returns an error if a bad url is passed', async function() {
-      const error = await Iris.parse('foo.com');
+      const error = await Mercury.parse('foo.com');
 
       assert.equal(error, Errors.badUrl);
     });
 
     it('does the whole thing', async function() {
-      const result = await Iris.parse('http://theconcourse.deadspin.com/phyllis-schlafly-finally-croaks-1786219220');
+      const result = await Mercury.parse('http://theconcourse.deadspin.com/phyllis-schlafly-finally-croaks-1786219220');
 
       assert.equal(typeof result, 'object');
       // console.log(result)
     });
 
     it('does blogger', async function() {
-      const result = await Iris.parse('https://googleblog.blogspot.com/2016/08/onhub-turns-one-today.html');
+      const result = await Mercury.parse('https://googleblog.blogspot.com/2016/08/onhub-turns-one-today.html');
 
       assert.equal(typeof result, 'object');
     });
 
     it('does wikipedia', async function() {
-      const result = await Iris.parse('https://en.wikipedia.org/wiki/Brihadeeswarar_Temple_fire');
+      const result = await Mercury.parse('https://en.wikipedia.org/wiki/Brihadeeswarar_Temple_fire');
 
       assert.equal(typeof result, 'object');
       // console.log(result)
     });
 
     it('does the nyt', async function() {
-      const result = await Iris.parse('http://www.nytimes.com/2016/08/16/upshot/the-state-of-the-clinton-trump-race-is-it-over.html?_r=0');
+      const result = await Mercury.parse('http://www.nytimes.com/2016/08/16/upshot/the-state-of-the-clinton-trump-race-is-it-over.html?_r=0');
 
       assert.equal(typeof result, 'object');
       assert.equal(result.total_pages, 1);
@@ -48,7 +48,7 @@ describe('Iris', () => {
 
     it('does ars pagination', async function() {
       const url = 'http://arstechnica.com/gadgets/2016/08/the-connected-renter-how-to-make-your-apartment-smarter/';
-      const result = await Iris.parse(
+      const result = await Mercury.parse(
         url,
         null,
         { fetchAllPages: true }