diff --git a/.eslintignore b/.eslintignore
index d6550734..78502e5a 100644
--- a/.eslintignore
+++ b/.eslintignore
@@ -1,3 +1,4 @@
**/fixtures/*
dist/*
coverage/*
+karma.conf.js
diff --git a/circle.yml b/circle.yml
index 139ee93f..734f9692 100644
--- a/circle.yml
+++ b/circle.yml
@@ -3,9 +3,6 @@ machine:
pre:
- mkdir ~/.yarn-cache
- timezone:
- America/New_York
-
node:
version:
4.3.2
@@ -14,8 +11,10 @@ machine:
dependencies:
pre:
- curl -o- -L https://yarnpkg.com/install.sh | bash
- - nvm install 6.9.1
- nvm install 7.0.0
+ # For some reason phantomjs-prebuild is failing w/yarn, but npm installing works
+ - npm install phantomjs-prebuilt
+
cache_directories:
- ~/.yarn-cache
override:
@@ -24,9 +23,11 @@ dependencies:
## Customize test commands
test:
override:
- - nvm use 4.3.2 && yarn build && yarn test -- --maxWorkers=4:
+ # Using 4.3.2 by default
+ - yarn build && yarn test -- --maxWorkers=4:
parallel: true
- - nvm use 6.9 && yarn build && yarn test -- --maxWorkers=4:
+ # Switch to 7 and lint
+ - nvm use 7.0 && yarn lint:ci && yarn build && yarn test -- --maxWorkers=4:
parallel: true
- - nvm use 7.0 && yarn build && yarn test -- --maxWorkers=4:
+ - nvm use 7.0 && yarn test:web -- --maxWorkers=4 && yarn build:web -- --maxWorkers=4:
parallel: true
diff --git a/dist/generate-custom-parser.js b/dist/generate-custom-parser.js
index 8abcb0cf..c3edfeb0 100644
--- a/dist/generate-custom-parser.js
+++ b/dist/generate-custom-parser.js
@@ -3,25 +3,25 @@
function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; }
var _slicedToArray = _interopDefault(require('babel-runtime/helpers/slicedToArray'));
+var _toConsumableArray = _interopDefault(require('babel-runtime/helpers/toConsumableArray'));
var fs = _interopDefault(require('fs'));
var URL = _interopDefault(require('url'));
var inquirer = _interopDefault(require('inquirer'));
var ora = _interopDefault(require('ora'));
var child_process = require('child_process');
-var _Object$freeze = _interopDefault(require('babel-runtime/core-js/object/freeze'));
+var _Reflect$ownKeys = _interopDefault(require('babel-runtime/core-js/reflect/own-keys'));
+var _defineProperty = _interopDefault(require('babel-runtime/helpers/defineProperty'));
+var _extends = _interopDefault(require('babel-runtime/helpers/extends'));
var _typeof = _interopDefault(require('babel-runtime/helpers/typeof'));
+var _getIterator = _interopDefault(require('babel-runtime/core-js/get-iterator'));
+var _Object$freeze = _interopDefault(require('babel-runtime/core-js/object/freeze'));
var regenerator = _interopDefault(require('babel-runtime/regenerator'));
-var _extends = _interopDefault(require('babel-runtime/helpers/extends'));
var asyncToGenerator = _interopDefault(require('babel-runtime/helpers/asyncToGenerator'));
var cheerio = _interopDefault(require('cheerio'));
var promise = _interopDefault(require('babel-runtime/core-js/promise'));
var request = _interopDefault(require('request'));
-var _Reflect$ownKeys = _interopDefault(require('babel-runtime/core-js/reflect/own-keys'));
var keys = _interopDefault(require('babel-runtime/core-js/object/keys'));
-var toConsumableArray = _interopDefault(require('babel-runtime/helpers/toConsumableArray'));
var stringDirection = _interopDefault(require('string-direction'));
-var _getIterator = _interopDefault(require('babel-runtime/core-js/get-iterator'));
-var defineProperty = _interopDefault(require('babel-runtime/helpers/defineProperty'));
var validUrl = _interopDefault(require('valid-url'));
var moment = _interopDefault(require('moment'));
var wuzzy = _interopDefault(require('wuzzy'));
@@ -30,218 +30,54 @@ var from = _interopDefault(require('babel-runtime/core-js/array/from'));
var ellipsize = _interopDefault(require('ellipsize'));
var _taggedTemplateLiteral = _interopDefault(require('babel-runtime/helpers/taggedTemplateLiteral'));
-function _interopDefault$1(ex){return ex&&(typeof ex==='undefined'?'undefined':_typeof(ex))==='object'&&'default'in ex?ex['default']:ex;}var _regeneratorRuntime=_interopDefault$1(regenerator);var _extends$1=_interopDefault$1(_extends);var _asyncToGenerator=_interopDefault$1(asyncToGenerator);var URL$1=_interopDefault$1(URL);var cheerio$1=_interopDefault$1(cheerio);var _Promise=_interopDefault$1(promise);var request$1=_interopDefault$1(request);var _Reflect$ownKeys$1=_interopDefault$1(_Reflect$ownKeys);var _Object$keys=_interopDefault$1(keys);var _toConsumableArray=_interopDefault$1(toConsumableArray);var _slicedToArray$1=_interopDefault$1(_slicedToArray);var stringDirection$1=_interopDefault$1(stringDirection);var _getIterator$1=_interopDefault$1(_getIterator);var _defineProperty=_interopDefault$1(defineProperty);var _typeof$1=_interopDefault$1(_typeof);var validUrl$1=_interopDefault$1(validUrl);var moment$1=_interopDefault$1(moment);var wuzzy$1=_interopDefault$1(wuzzy);var difflib$1=_interopDefault$1(difflib);var _Array$from=_interopDefault$1(from);var ellipsize$1=_interopDefault$1(ellipsize);var _marked=[range].map(_regeneratorRuntime.mark);function range(){var start=arguments.length>0&&arguments[0]!==undefined?arguments[0]:1;var end=arguments.length>1&&arguments[1]!==undefined?arguments[1]:1;return _regeneratorRuntime.wrap(function range$(_context){while(1){switch(_context.prev=_context.next){case 0:if(!(start<=end)){_context.next=5;break;}_context.next=3;return start+=1;case 3:_context.next=0;break;case 5:case"end":return _context.stop();}}},_marked[0],this);}// extremely simple url validation as a first step
-function validateUrl(_ref){var hostname=_ref.hostname;// If this isn't a valid url, return an error message
-return!!hostname;}var Errors={badUrl:{error:true,messages:'The url parameter passed does not look like a valid URL. Please check your data and try again.'}};var REQUEST_HEADERS={'User-Agent':'Readability - http://readability.com/about/'};// The number of milliseconds to attempt to fetch a resource before timing out.
-var FETCH_TIMEOUT=10000;// Content types that we do not extract content from
-var BAD_CONTENT_TYPES=['audio/mpeg','image/gif','image/jpeg','image/jpg'];var BAD_CONTENT_TYPES_RE=new RegExp('^('+BAD_CONTENT_TYPES.join('|')+')$','i');// Use this setting as the maximum size an article can be
-// for us to attempt parsing. Defaults to 5 MB.
-var MAX_CONTENT_LENGTH=5242880;// Turn the global proxy on or off
-// Proxying is not currently enabled in Python source
-// so not implementing logic in port.
-function get(options){return new _Promise(function(resolve,reject){request$1(options,function(err,response,body){if(err){reject(err);}else{resolve({body:body,response:response});}});});}// Evaluate a response to ensure it's something we should be keeping.
-// This does not validate in the sense of a response being 200 level or
-// not. Validation here means that we haven't found reason to bail from
-// further processing of this url.
-function validateResponse(response){var parseNon2xx=arguments.length>1&&arguments[1]!==undefined?arguments[1]:false;// Check if we got a valid status code
-if(response.statusMessage!=='OK'){if(!response.statusCode){throw new Error('Unable to fetch content. Original exception was '+response.error);}else if(!parseNon2xx){throw new Error('Resource returned a response status code of '+response.statusCode+' and resource was instructed to reject non-2xx level status codes.');}}var _response$headers=response.headers,contentType=_response$headers['content-type'],contentLength=_response$headers['content-length'];// Check that the content is not in BAD_CONTENT_TYPES
-if(BAD_CONTENT_TYPES_RE.test(contentType)){throw new Error('Content-type for this resource was '+contentType+' and is not allowed.');}// Check that the content length is below maximum
-if(contentLength>MAX_CONTENT_LENGTH){throw new Error('Content for this resource was too large. Maximum content length is '+MAX_CONTENT_LENGTH+'.');}return true;}// Grabs the last two pieces of the URL and joins them back together
-// This is to get the 'livejournal.com' from 'erotictrains.livejournal.com'
-// Set our response attribute to the result of fetching our URL.
-// TODO: This should gracefully handle timeouts and raise the
-// proper exceptions on the many failure cases of HTTP.
-// TODO: Ensure we are not fetching something enormous. Always return
-// unicode content for HTML, with charset conversion.
-var fetchResource$1=function(){var _ref2=_asyncToGenerator(_regeneratorRuntime.mark(function _callee(url,parsedUrl){var options,_ref3,response,body;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:parsedUrl=parsedUrl||URL$1.parse(encodeURI(url));options={url:parsedUrl,headers:_extends$1({},REQUEST_HEADERS),timeout:FETCH_TIMEOUT,// Don't set encoding; fixes issues
-// w/gzipped responses
-encoding:null,// Accept cookies
-jar:true,// Accept and decode gzip
-gzip:true,// Follow any redirect
-followAllRedirects:true};_context.next=4;return get(options);case 4:_ref3=_context.sent;response=_ref3.response;body=_ref3.body;_context.prev=7;validateResponse(response);return _context.abrupt('return',{body:body,response:response});case 12:_context.prev=12;_context.t0=_context['catch'](7);return _context.abrupt('return',Errors.badUrl);case 15:case'end':return _context.stop();}}},_callee,this,[[7,12]]);}));function fetchResource(_x2,_x3){return _ref2.apply(this,arguments);}return fetchResource;}();function convertMetaProp($,from$$1,to){$('meta['+from$$1+']').each(function(_,node){var $node=$(node);var value=$node.attr(from$$1);$node.attr(to,value);$node.removeAttr(from$$1);});return $;}// For ease of use in extracting from meta tags,
-// replace the "content" attribute on meta tags with the
-// "value" attribute.
-//
-// In addition, normalize 'property' attributes to 'name' for ease of
-// querying later. See, e.g., og or twitter meta tags.
-function normalizeMetaTags($){$=convertMetaProp($,'content','value');$=convertMetaProp($,'property','name');return $;}var IS_LINK=new RegExp('https?://','i');var IS_IMAGE=new RegExp('.(png|gif|jpe?g)','i');var TAGS_TO_REMOVE=['script','style','form'].join(',');// Convert all instances of images with potentially
-// lazy loaded images into normal images.
-// Many sites will have img tags with no source, or an image tag with a src
-// attribute that a is a placeholer. We need to be able to properly fill in
-// the src attribute so the images are no longer lazy loaded.
-function convertLazyLoadedImages($){$('img').each(function(_,img){_Reflect$ownKeys$1(img.attribs).forEach(function(attr){var value=img.attribs[attr];if(attr!=='src'&&IS_LINK.test(value)&&IS_IMAGE.test(value)){$(img).attr('src',value);}});});return $;}function isComment(index,node){return node.type==='comment';}function cleanComments($){$.root().find('*').contents().filter(isComment).remove();return $;}function clean($){$(TAGS_TO_REMOVE).remove();$=cleanComments($);return $;}var Resource={// Create a Resource.
-//
-// :param url: The URL for the document we should retrieve.
-// :param response: If set, use as the response rather than
-// attempting to fetch it ourselves. Expects a
-// string.
-create:function create(url,preparedResponse,parsedUrl){var _this=this;return _asyncToGenerator(_regeneratorRuntime.mark(function _callee(){var result,validResponse;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:result=void 0;if(!preparedResponse){_context.next=6;break;}validResponse={statusMessage:'OK',statusCode:200,headers:{'content-type':'text/html','content-length':500}};result={body:preparedResponse,response:validResponse};_context.next=9;break;case 6:_context.next=8;return fetchResource$1(url,parsedUrl);case 8:result=_context.sent;case 9:if(!result.error){_context.next=11;break;}return _context.abrupt('return',result);case 11:return _context.abrupt('return',_this.generateDoc(result));case 12:case'end':return _context.stop();}}},_callee,_this);}))();},generateDoc:function generateDoc(_ref){var content=_ref.body,response=_ref.response;var contentType=response.headers['content-type'];// TODO: Implement is_text function from
-// https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57
-if(!contentType.includes('html')&&!contentType.includes('text')){throw new Error('Content does not appear to be text.');}var $=cheerio$1.load(content,{normalizeWhitespace:true});if($.root().children().length===0){throw new Error('No children, likely a bad parse.');}$=normalizeMetaTags($);$=convertLazyLoadedImages($);$=clean($);return $;}};var merge=function merge(extractor,domains){return domains.reduce(function(acc,domain){acc[domain]=extractor;return acc;},{});};function mergeSupportedDomains(extractor){return extractor.supportedDomains?merge(extractor,[extractor.domain].concat(_toConsumableArray(extractor.supportedDomains))):merge(extractor,[extractor.domain]);}var BloggerExtractor={domain:'blogspot.com',content:{// Blogger is insane and does not load its content
-// initially in the page, but it's all there
-// in noscript
-selectors:['.post-content noscript'],// Selectors to remove from the extracted content
-clean:[],// Convert the noscript tag to a div
-transforms:{noscript:'div'}},author:{selectors:['.post-author-name']},title:{selectors:['.post h2.title']},date_published:{selectors:['span.publishdate']}};var NYMagExtractor={domain:'nymag.com',content:{// Order by most likely. Extractor will stop on first occurrence
-selectors:['div.article-content','section.body','article.article'],// Selectors to remove from the extracted content
-clean:['.ad','.single-related-story'],// Object of tranformations to make on matched elements
-// Each key is the selector, each value is the tag to
-// transform to.
-// If a function is given, it should return a string
-// to convert to or nothing (in which case it will not perform
-// the transformation.
-transforms:{// Convert h1s to h2s
-h1:'h2',// Convert lazy-loaded noscript images to figures
-noscript:function noscript($node){var $children=$node.children();if($children.length===1&&$children.get(0).tagName==='img'){return'figure';}return null;}}},title:{selectors:['h1.lede-feature-title','h1.headline-primary','h1']},author:{selectors:['.by-authors','.lede-feature-author']},dek:{selectors:['.lede-feature-teaser']},date_published:{selectors:[['time.article-timestamp[datetime]','datetime'],'time.article-timestamp']}};var WikipediaExtractor={domain:'wikipedia.org',content:{selectors:['#mw-content-text'],defaultCleaner:false,// transform top infobox to an image with caption
-transforms:{'.infobox img':function infoboxImg($node){var $parent=$node.parents('.infobox');// Only prepend the first image in .infobox
-if($parent.children('img').length===0){$parent.prepend($node);}},'.infobox caption':'figcaption','.infobox':'figure'},// Selectors to remove from the extracted content
-clean:['.mw-editsection','figure tr, figure td, figure tbody','#toc','.navbox']},author:'Wikipedia Contributors',title:{selectors:['h2.title']},date_published:{selectors:['#footer-info-lastmod']}};var TwitterExtractor={domain:'twitter.com',content:{transforms:{// We're transforming essentially the whole page here.
-// Twitter doesn't have nice selectors, so our initial
-// selector grabs the whole page, then we're re-writing
-// it to fit our needs before we clean it up.
-'.permalink[role=main]':function permalinkRoleMain($node,$){var tweets=$node.find('.tweet');var $tweetContainer=$('
');$tweetContainer.append(tweets);$node.replaceWith($tweetContainer);},// Twitter wraps @ with s, which
-// renders as a strikethrough
-s:'span'},selectors:['.permalink[role=main]'],defaultCleaner:false,clean:['.stream-item-footer','button','.tweet-details-fixer']},author:{selectors:['.tweet.permalink-tweet .username']},date_published:{selectors:[['.permalink-tweet ._timestamp[data-time-ms]','data-time-ms']]}};var NYTimesExtractor={domain:'www.nytimes.com',title:{selectors:['.g-headline','h1.headline']},author:{selectors:[['meta[name="author"]','value'],'.g-byline','.byline']},content:{selectors:['div.g-blocks','article#story'],defaultCleaner:false,transforms:{'img.g-lazy':function imgGLazy($node){var src=$node.attr('src');// const widths = $node.attr('data-widths')
-// .slice(1)
-// .slice(0, -1)
-// .split(',');
-// if (widths.length) {
-// width = widths.slice(-1);
-// } else {
-// width = '900';
-// }
-var width=640;src=src.replace('{{size}}',width);$node.attr('src',src);}},clean:['.ad','header#story-header','.story-body-1 .lede.video','.visually-hidden','#newsletter-promo','.promo','.comments-button','.hidden']},date_published:null,lead_image_url:null,dek:null,next_page_url:null,excerpt:null};// Rename CustomExtractor
-// to fit your publication
-var TheAtlanticExtractor={domain:'www.theatlantic.com',title:{selectors:['h1.hed']},author:{selectors:['article#article .article-cover-extra .metadata .byline a']},content:{selectors:['.article-body'],// Is there anything in the content you selected that needs transformed
-// before it's consumable content? E.g., unusual lazy loaded images
-transforms:[],// Is there anything that is in the result that shouldn't be?
-// The clean selectors will remove anything that matches from
-// the result
-clean:[]},date_published:null,lead_image_url:null,dek:null,next_page_url:null,excerpt:null};// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var NewYorkerExtractor={domain:'www.newyorker.com',title:{selectors:['h1.title']},author:{selectors:['.contributors']},content:{selectors:['div#articleBody','div.articleBody'],// Is there anything in the content you selected that needs transformed
-// before it's consumable content? E.g., unusual lazy loaded images
-transforms:[],// Is there anything that is in the result that shouldn't be?
-// The clean selectors will remove anything that matches from
-// the result
-clean:[]},date_published:{selectors:[['meta[name="article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var WiredExtractor={domain:'www.wired.com',title:{selectors:['h1.post-title']},author:{selectors:['a[rel="author"]']},content:{selectors:['article.content'],// Is there anything in the content you selected that needs transformed
-// before it's consumable content? E.g., unusual lazy loaded images
-transforms:[],// Is there anything that is in the result that shouldn't be?
-// The clean selectors will remove anything that matches from
-// the result
-clean:['.visually-hidden']},date_published:{selectors:[['meta[itemprop="datePublished"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var MSNExtractor={domain:'www.msn.com',title:{selectors:['h1']},author:{selectors:['span.authorname-txt']},content:{selectors:['div.richtext'],// Is there anything in the content you selected that needs transformed
-// before it's consumable content? E.g., unusual lazy loaded images
-transforms:[],// Is there anything that is in the result that shouldn't be?
-// The clean selectors will remove anything that matches from
-// the result
-clean:['span.caption']},date_published:{selectors:['span.time']},lead_image_url:{selectors:[]},dek:{selectors:[['meta[name="description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var YahooExtractor={domain:'www.yahoo.com',title:{selectors:['header.canvas-header']},author:{selectors:['span.provider-name']},content:{selectors:[// enter content selectors
-'.content-canvas'],// Is there anything in the content you selected that needs transformed
-// before it's consumable content? E.g., unusual lazy loaded images
-transforms:[],// Is there anything that is in the result that shouldn't be?
-// The clean selectors will remove anything that matches from
-// the result
-clean:['.figure-caption']},date_published:{selectors:[['time.date[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var BuzzfeedExtractor={domain:'www.buzzfeed.com',title:{selectors:['h1[id="post-title"]']},author:{selectors:['a[data-action="user/username"]','byline__author']},content:{selectors:['#buzz_sub_buzz'],defaultCleaner:false,// Is there anything in the content you selected that needs transformed
-// before it's consumable content? E.g., unusual lazy loaded images
-transforms:{h2:'b'},// Is there anything that is in the result that shouldn't be?
-// The clean selectors will remove anything that matches from
-// the result
-clean:['.instapaper_ignore','.suplist_list_hide .buzz_superlist_item .buzz_superlist_number_inline','.share-box']},date_published:{selectors:['.buzz-datetime']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var WikiaExtractor={domain:'fandom.wikia.com',title:{selectors:['h1.entry-title']},author:{selectors:['.author vcard','.fn']},content:{selectors:['.grid-content','.entry-content'],// Is there anything in the content you selected that needs transformed
-// before it's consumable content? E.g., unusual lazy loaded images
-transforms:[],// Is there anything that is in the result that shouldn't be?
-// The clean selectors will remove anything that matches from
-// the result
-clean:[]},date_published:{selectors:[['meta[name="article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var LittleThingsExtractor={domain:'www.littlethings.com',title:{selectors:['h1.post-title']},author:{selectors:[['meta[name="author"]','value']]},content:{selectors:[// enter content selectors
-'.mainContentIntro','.content-wrapper'],// Is there anything in the content you selected that needs transformed
-// before it's consumable content? E.g., unusual lazy loaded images
-transforms:[],// Is there anything that is in the result that shouldn't be?
-// The clean selectors will remove anything that matches from
-// the result
-clean:[]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var PoliticoExtractor={domain:'www.politico.com',title:{selectors:[// enter title selectors
-['meta[name="og:title"]','value']]},author:{selectors:['.story-main-content .byline .vcard']},content:{selectors:[// enter content selectors
-'.story-main-content','.content-group','.story-core','.story-text'],// Is there anything in the content you selected that needs transformed
-// before it's consumable content? E.g., unusual lazy loaded images
-transforms:[],// Is there anything that is in the result that shouldn't be?
-// The clean selectors will remove anything that matches from
-// the result
-clean:['figcaption']},date_published:{selectors:[['.story-main-content .timestamp time[datetime]','datetime']]},lead_image_url:{selectors:[// enter lead_image_url selectors
-['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="description"]','value']]},next_page_url:null,excerpt:null};var DeadspinExtractor={domain:'deadspin.com',supportedDomains:['jezebel.com','lifehacker.com','kotaku.com','gizmodo.com','jalopnik.com','kinja.com'],title:{selectors:['h1.headline']},author:{selectors:['.author']},content:{selectors:['.post-content','.entry-content'],// Is there anything in the content you selected that needs transformed
-// before it's consumable content? E.g., unusual lazy loaded images
-transforms:{'iframe.lazyload[data-recommend-id^="youtube://"]':function iframeLazyloadDataRecommendIdYoutube($node){var youtubeId=$node.attr('id').split('youtube-')[1];$node.attr('src','https://www.youtube.com/embed/'+youtubeId);}},// Is there anything that is in the result that shouldn't be?
-// The clean selectors will remove anything that matches from
-// the result
-clean:[]},date_published:{selectors:[['time.updated[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[// enter selectors
-]},next_page_url:{selectors:[// enter selectors
-]},excerpt:{selectors:[// enter selectors
-]}};// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var BroadwayWorldExtractor={domain:'www.broadwayworld.com',title:{selectors:['h1.article-title']},author:{selectors:['span[itemprop=author]']},content:{selectors:['div[itemprop=articlebody]'],// Is there anything in the content you selected that needs transformed
-// before it's consumable content? E.g., unusual lazy loaded images
-transforms:{},// Is there anything that is in the result that shouldn't be?
-// The clean selectors will remove anything that matches from
-// the result
-clean:[]},date_published:{selectors:[['meta[itemprop=datePublished]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:{selectors:[// enter selectors
-]},excerpt:{selectors:[// enter selectors
-]}};// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var ApartmentTherapyExtractor={domain:'www.apartmenttherapy.com',title:{selectors:['h1.headline']},author:{selectors:['.PostByline__name']},content:{selectors:['div.post__content'],// Is there anything in the content you selected that needs transformed
-// before it's consumable content? E.g., unusual lazy loaded images
-transforms:{'div[data-render-react-id="images/LazyPicture"]':function divDataRenderReactIdImagesLazyPicture($node,$){var data=JSON.parse($node.attr('data-props'));var src=data.sources[0].src;var $img=$('').attr('src',src);$node.replaceWith($img);}},// Is there anything that is in the result that shouldn't be?
-// The clean selectors will remove anything that matches from
-// the result
-clean:[]},date_published:{selectors:[['.PostByline__timestamp[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name=description]','value']]},next_page_url:{selectors:[// enter selectors
-]},excerpt:{selectors:[// enter selectors
-]}};var MediumExtractor={domain:'medium.com',supportedDomains:['trackchanges.postlight.com'],title:{selectors:['h1']},author:{selectors:[['meta[name="author"]','value']]},content:{selectors:['.section-content'],// Is there anything in the content you selected that needs transformed
-// before it's consumable content? E.g., unusual lazy loaded images
-transforms:{// Re-write lazy-loaded youtube videos
-iframe:function iframe($node){var ytRe=/https:\/\/i.embed.ly\/.+url=https:\/\/i\.ytimg\.com\/vi\/(\w+)\//;var thumb=decodeURIComponent($node.attr('data-thumbnail'));if(ytRe.test(thumb)){var _thumb$match=thumb.match(ytRe),_thumb$match2=_slicedToArray$1(_thumb$match,2),_=_thumb$match2[0],youtubeId=_thumb$match2[1];// eslint-disable-line
-$node.attr('src','https://www.youtube.com/embed/'+youtubeId);var $parent=$node.parents('figure');$parent.prepend($node.clone());$node.remove();}}},// Is there anything that is in the result that shouldn't be?
-// The clean selectors will remove anything that matches from
-// the result
-clean:[]},date_published:{selectors:[['time[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[// enter selectors
-]},next_page_url:{selectors:[// enter selectors
-]},excerpt:{selectors:[// enter selectors
-]}};var CustomExtractors=_Object$freeze({BloggerExtractor:BloggerExtractor,NYMagExtractor:NYMagExtractor,WikipediaExtractor:WikipediaExtractor,TwitterExtractor:TwitterExtractor,NYTimesExtractor:NYTimesExtractor,TheAtlanticExtractor:TheAtlanticExtractor,NewYorkerExtractor:NewYorkerExtractor,WiredExtractor:WiredExtractor,MSNExtractor:MSNExtractor,YahooExtractor:YahooExtractor,BuzzfeedExtractor:BuzzfeedExtractor,WikiaExtractor:WikiaExtractor,LittleThingsExtractor:LittleThingsExtractor,PoliticoExtractor:PoliticoExtractor,DeadspinExtractor:DeadspinExtractor,BroadwayWorldExtractor:BroadwayWorldExtractor,ApartmentTherapyExtractor:ApartmentTherapyExtractor,MediumExtractor:MediumExtractor});var Extractors=_Object$keys(CustomExtractors).reduce(function(acc,key){var extractor=CustomExtractors[key];return _extends$1({},acc,mergeSupportedDomains(extractor));},{});// Spacer images to be removed
-var SPACER_RE=new RegExp('trans|transparent|spacer|blank','i');// The class we will use to mark elements we want to keep
+// Spacer images to be removed
+var SPACER_RE = new RegExp('trans|transparent|spacer|blank', 'i');
+
+// The class we will use to mark elements we want to keep
// but would normally remove
-var KEEP_CLASS='mercury-parser-keep';var KEEP_SELECTORS=['iframe[src^="https://www.youtube.com"]','iframe[src^="http://www.youtube.com"]','iframe[src^="https://player.vimeo"]','iframe[src^="http://player.vimeo"]'];// A list of tags to strip from the output if we encounter them.
-var STRIP_OUTPUT_TAGS=['title','script','noscript','link','style','hr','embed','iframe','object'];// cleanAttributes
-var REMOVE_ATTRS=['style','align'];var REMOVE_ATTR_SELECTORS=REMOVE_ATTRS.map(function(selector){return'['+selector+']';});var REMOVE_ATTR_LIST=REMOVE_ATTRS.join(',');var WHITELIST_ATTRS=['src','srcset','href','class','id','alt'];var WHITELIST_ATTRS_RE=new RegExp('^('+WHITELIST_ATTRS.join('|')+')$','i');// removeEmpty
-var REMOVE_EMPTY_TAGS=['p'];var REMOVE_EMPTY_SELECTORS=REMOVE_EMPTY_TAGS.map(function(tag){return tag+':empty';}).join(',');// cleanTags
-var CLEAN_CONDITIONALLY_TAGS=['ul','ol','table','div','button','form'].join(',');// cleanHeaders
-var HEADER_TAGS=['h2','h3','h4','h5','h6'];var HEADER_TAG_LIST=HEADER_TAGS.join(',');// // CONTENT FETCHING CONSTANTS ////
+var KEEP_CLASS = 'mercury-parser-keep';
+
+var KEEP_SELECTORS = ['iframe[src^="https://www.youtube.com"]', 'iframe[src^="http://www.youtube.com"]', 'iframe[src^="https://player.vimeo"]', 'iframe[src^="http://player.vimeo"]'];
+
+// A list of tags to strip from the output if we encounter them.
+var STRIP_OUTPUT_TAGS = ['title', 'script', 'noscript', 'link', 'style', 'hr', 'embed', 'iframe', 'object'];
+
+// cleanAttributes
+var REMOVE_ATTRS = ['style', 'align'];
+var REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(function (selector) {
+ return '[' + selector + ']';
+});
+var REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');
+var WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt'];
+var WHITELIST_ATTRS_RE = new RegExp('^(' + WHITELIST_ATTRS.join('|') + ')$', 'i');
+
+// removeEmpty
+var REMOVE_EMPTY_TAGS = ['p'];
+var REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(function (tag) {
+ return tag + ':empty';
+}).join(',');
+
+// cleanTags
+var CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');
+
+// cleanHeaders
+var HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];
+var HEADER_TAG_LIST = HEADER_TAGS.join(',');
+
+// // CONTENT FETCHING CONSTANTS ////
+
// A list of strings that can be considered unlikely candidates when
// extracting content from a resource. These strings are joined together
// and then tested for existence using re:test, so may contain simple,
// non-pipe style regular expression queries if necessary.
-var UNLIKELY_CANDIDATES_BLACKLIST=['ad-break','adbox','advert','addthis','agegate','aux','blogger-labels','combx','comment','conversation','disqus','entry-unrelated','extra','foot',// 'form', // This is too generic, has too many false positives
-'header','hidden','loader','login',// Note: This can hit 'blogindex'.
-'menu','meta','nav','outbrain','pager','pagination','predicta',// readwriteweb inline ad box
-'presence_control_external',// lifehacker.com container full of false positives
-'popup','printfriendly','related','remove','remark','rss','share','shoutbox','sidebar','sociable','sponsor','taboola','tools'];// A list of strings that can be considered LIKELY candidates when
+var UNLIKELY_CANDIDATES_BLACKLIST = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot',
+// 'form', // This is too generic, has too many false positives
+'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.
+'menu', 'meta', 'nav', 'outbrain', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box
+'presence_control_external', // lifehacker.com container full of false positives
+'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'taboola', 'tools'];
+
+// A list of strings that can be considered LIKELY candidates when
// extracting content from a resource. Essentially, the inverse of the
// blacklist above - if something matches both blacklist and whitelist,
// it is kept. This is useful, for example, if something has a className
@@ -252,71 +88,126 @@ var UNLIKELY_CANDIDATES_BLACKLIST=['ad-break','adbox','advert','addthis','agegat
// These strings are joined together and then tested for existence using
// re:test, so may contain simple, non-pipe style regular expression queries
// if necessary.
-var UNLIKELY_CANDIDATES_WHITELIST=['and','article','body','blogindex','column','content','entry-content-asset','format',// misuse of form
-'hfeed','hentry','hatom','main','page','posts','shadow'];// A list of tags which, if found inside, should cause a to NOT
+var UNLIKELY_CANDIDATES_WHITELIST = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form
+'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];
+
+// A list of tags which, if found inside, should cause a to NOT
// be turned into a paragraph tag. Shallow div tags without these elements
// should be turned into tags.
-var DIV_TO_P_BLOCK_TAGS=['a','blockquote','dl','div','img','p','pre','table'].join(',');// A list of tags that should be ignored when trying to find the top candidate
+var DIV_TO_P_BLOCK_TAGS = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');
+
+// A list of tags that should be ignored when trying to find the top candidate
// for a document.
+
+
+
+
// A list of selectors that specify, very clearly, either hNews or other
// very content-specific style content, like Blogger templates.
// More examples here: http://microformats.org/wiki/blog-post-formats
+
+
+
+
+
// A list of strings that denote a positive scoring for this content as being
// an article container. Checked against className and id.
//
// TODO: Perhaps have these scale based on their odds of being quality?
-var POSITIVE_SCORE_HINTS=['article','articlecontent','instapaper_body','blog','body','content','entry-content-asset','entry','hentry','main','Normal','page','pagination','permalink','post','story','text','[-_]copy',// usatoday
-'\\Bcopy'];// The above list, joined into a matching regular expression
-var POSITIVE_SCORE_RE=new RegExp(POSITIVE_SCORE_HINTS.join('|'),'i');// Readability publisher-specific guidelines
+
+
+// The above list, joined into a matching regular expression
+
+
+// Readability publisher-specific guidelines
+
+
// A list of strings that denote a negative scoring for this content as being
// an article container. Checked against className and id.
//
// TODO: Perhaps have these scale based on their odds of being quality?
-var NEGATIVE_SCORE_HINTS=['adbox','advert','author','bio','bookmark','bottom','byline','clear','com-','combx','comment','comment\\B','contact','copy','credit','crumb','date','deck','excerpt','featured',// tnr.com has a featured_content which throws us off
-'foot','footer','footnote','graf','head','info','infotext',// newscientist.com copyright
-'instapaper_ignore','jump','linebreak','link','masthead','media','meta','modal','outbrain',// slate.com junk
-'promo','pr_',// autoblog - press release
-'related','respond','roundcontent',// lifehacker restricted content warning
-'scroll','secondary','share','shopping','shoutbox','side','sidebar','sponsor','stamp','sub','summary','tags','tools','widget'];// The above list, joined into a matching regular expression
-var NEGATIVE_SCORE_RE=new RegExp(NEGATIVE_SCORE_HINTS.join('|'),'i');// XPath to try to determine if a page is wordpress. Not always successful.
-var IS_WP_SELECTOR='meta[name=generator][value^=WordPress]';// Match a digit. Pretty clear.
-// A list of words that, if found in link text or URLs, likely mean that
+
+// The above list, joined into a matching regular expression
+
+
+// XPath to try to determine if a page is wordpress. Not always successful.
+var IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';
+
+// Match a digit. Pretty clear.
+
+
+// A list of words that, if found in link text or URLs, likely mean that
// this link is not a next page link.
+
+
+
// Match any phrase that looks like it could be page, or paging, or pagination
-var PAGE_RE=new RegExp('pag(e|ing|inat)','i');// Match any link text/classname/id that looks like it could mean the next
+
+
+// Match any link text/classname/id that looks like it could mean the next
// page. Things like: next, continue, >, >>, » but not >|, »| as those can
// mean last page.
// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\|]|$)|»([^\|]|$))', 'i');
+
+
// Match any link text/classname/id that looks like it is an end link: things
// like "first", "last", "end", etc.
+
+
// Match any link text/classname/id that looks like it means the previous
// page.
+
+
// Match 2 or more consecutive tags
+
+
// Match 1 BR tag.
+
+
// A list of all of the block level tags known in HTML5 and below. Taken from
// http://bit.ly/qneNIT
-var BLOCK_LEVEL_TAGS=['article','aside','blockquote','body','br','button','canvas','caption','col','colgroup','dd','div','dl','dt','embed','fieldset','figcaption','figure','footer','form','h1','h2','h3','h4','h5','h6','header','hgroup','hr','li','map','object','ol','output','p','pre','progress','section','table','tbody','textarea','tfoot','th','thead','tr','ul','video'];var BLOCK_LEVEL_TAGS_RE=new RegExp('^('+BLOCK_LEVEL_TAGS.join('|')+')$','i');// The removal is implemented as a blacklist and whitelist, this test finds
+var BLOCK_LEVEL_TAGS = ['article', 'aside', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'col', 'colgroup', 'dd', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'map', 'object', 'ol', 'output', 'p', 'pre', 'progress', 'section', 'table', 'tbody', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'ul', 'video'];
+var BLOCK_LEVEL_TAGS_RE = new RegExp('^(' + BLOCK_LEVEL_TAGS.join('|') + ')$', 'i');
+
+// The removal is implemented as a blacklist and whitelist, this test finds
// blacklisted elements that aren't whitelisted. We do this all in one
// expression-both because it's only one pass, and because this skips the
// serialization for whitelisted nodes.
-var candidatesBlacklist=UNLIKELY_CANDIDATES_BLACKLIST.join('|');var CANDIDATES_BLACKLIST=new RegExp(candidatesBlacklist,'i');var candidatesWhitelist=UNLIKELY_CANDIDATES_WHITELIST.join('|');var CANDIDATES_WHITELIST=new RegExp(candidatesWhitelist,'i');function stripUnlikelyCandidates($){// Loop through the provided document and remove any non-link nodes
-// that are unlikely candidates for article content.
-//
-// Links are ignored because there are very often links to content
-// that are identified as non-body-content, but may be inside
-// article-like content.
-//
-// :param $: a cheerio object to strip nodes from
-// :return $: the cleaned cheerio object
-$('*').not('a').each(function(index,node){var $node=$(node);var classes=$node.attr('class');var id=$node.attr('id');if(!id&&!classes)return;var classAndId=(classes||'')+' '+(id||'');if(CANDIDATES_WHITELIST.test(classAndId)){return;}else if(CANDIDATES_BLACKLIST.test(classAndId)){$node.remove();}});return $;}// ## NOTES:
+var candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');
+var CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');
+
+var candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');
+var CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');
+
+// ## NOTES:
// Another good candidate for refactoring/optimizing.
// Very imperative code, I don't love it. - AP
+
// Given cheerio object, convert consecutive tags into
// tags instead.
//
// :param $: A cheerio object
-function brsToPs$$1($){var collapsing=false;$('br').each(function(index,element){var nextElement=$(element).next().get(0);if(nextElement&&nextElement.tagName==='br'){collapsing=true;$(element).remove();}else if(collapsing){collapsing=false;// $(element).replaceWith('')
-paragraphize(element,$,true);}});return $;}// Given a node, turn it into a P if it is not already a P, and
+
+function brsToPs$$1($) {
+ var collapsing = false;
+ $('br').each(function (index, element) {
+ var $element = $(element);
+ var nextElement = $element.next().get(0);
+
+ if (nextElement && nextElement.tagName.toLowerCase() === 'br') {
+ collapsing = true;
+ $element.remove();
+ } else if (collapsing) {
+ collapsing = false;
+ // $(element).replaceWith('')
+ paragraphize(element, $, true);
+ }
+ });
+
+ return $;
+}
+
+// Given a node, turn it into a P if it is not already a P, and
// make sure it conforms to the constraints of a P tag (I.E. does
// not contain any other block tags.)
//
@@ -326,51 +217,158 @@ paragraphize(element,$,true);}});return $;}// Given a node, turn it into a P if
// :param node: The node to paragraphize; this is a raw node
// :param $: The cheerio object to handle dom manipulation
// :param br: Whether or not the passed node is a br
-function paragraphize(node,$){var br=arguments.length>2&&arguments[2]!==undefined?arguments[2]:false;var $node=$(node);if(br){var sibling=node.nextSibling;var p=$('');// while the next node is text or not a block level element
-// append it to a new p node
-while(sibling&&!(sibling.tagName&&BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))){var nextSibling=sibling.nextSibling;$(sibling).appendTo(p);sibling=nextSibling;}$node.replaceWith(p);$node.remove();return $;}return $;}function convertDivs($){$('div').each(function(index,div){var $div=$(div);var convertable=$div.children(DIV_TO_P_BLOCK_TAGS).length===0;if(convertable){convertNodeTo($div,$,'p');}});return $;}function convertSpans($){$('span').each(function(index,span){var $span=$(span);var convertable=$span.parents('p, div').length===0;if(convertable){convertNodeTo($span,$,'p');}});return $;}// Loop through the provided doc, and convert any p-like elements to
-// actual paragraph tags.
-//
-// Things fitting this criteria:
-// * Multiple consecutive tags.
-// * tags without block level elements inside of them
-// * tags who are not children of or tags.
-//
-// :param $: A cheerio object to search
-// :return cheerio object with new p elements
-// (By-reference mutation, though. Returned just for convenience.)
-function convertToParagraphs$$1($){$=brsToPs$$1($);$=convertDivs($);$=convertSpans($);return $;}function convertNodeTo($node,$){var tag=arguments.length>2&&arguments[2]!==undefined?arguments[2]:'p';var node=$node.get(0);if(!node){return $;}var _$node$get=$node.get(0),attribs=_$node$get.attribs;var attribString=_Reflect$ownKeys$1(attribs).map(function(key){return key+'='+attribs[key];}).join(' ');$node.replaceWith('<'+tag+' '+attribString+'>'+$node.contents()+''+tag+'>');return $;}function cleanForHeight($img,$){var height=parseInt($img.attr('height'),10);var width=parseInt($img.attr('width'),10)||20;// Remove images that explicitly have very small heights or
-// widths, because they are most likely shims or icons,
-// which aren't very useful for reading.
-if((height||20)<10||width<10){$img.remove();}else if(height){// Don't ever specify a height on images, so that we can
-// scale with respect to width without screwing up the
-// aspect ratio.
-$img.removeAttr('height');}return $;}// Cleans out images where the source string matches transparent/spacer/etc
+
+function paragraphize(node, $) {
+ var br = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
+
+ var $node = $(node);
+
+ if (br) {
+ var sibling = node.nextSibling;
+ var p = $('');
+
+ // while the next node is text or not a block level element
+ // append it to a new p node
+ while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))) {
+ var nextSibling = sibling.nextSibling;
+ $(sibling).appendTo(p);
+ sibling = nextSibling;
+ }
+
+ $node.replaceWith(p);
+ $node.remove();
+ return $;
+ }
+
+ return $;
+}
+
+function convertDivs($) {
+ $('div').each(function (index, div) {
+ var $div = $(div);
+ var convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;
+
+ if (convertable) {
+ convertNodeTo$$1($div, $, 'p');
+ }
+ });
+
+ return $;
+}
+
+function convertSpans($) {
+ $('span').each(function (index, span) {
+ var $span = $(span);
+ var convertable = $span.parents('p, div').length === 0;
+ if (convertable) {
+ convertNodeTo$$1($span, $, 'p');
+ }
+ });
+
+ return $;
+}
+
+function convertNodeTo$$1($node, $) {
+ var tag = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'p';
+
+ var node = $node.get(0);
+ if (!node) {
+ return $;
+ }
+ var attrs = getAttrs(node) || {};
+ // console.log(attrs)
+
+ var attribString = _Reflect$ownKeys(attrs).map(function (key) {
+ return key + '=' + attrs[key];
+ }).join(' ');
+ var html = void 0;
+
+ if ($.browser) {
+ // In the browser, the contents of noscript tags aren't rendered, therefore
+ // transforms on the noscript tag (commonly used for lazy-loading) don't work
+ // as expected. This test case handles that
+ html = node.tagName.toLowerCase() === 'noscript' ? $node.text() : $node.html();
+ } else {
+ html = $node.contents();
+ }
+ $node.replaceWith('<' + tag + ' ' + attribString + '>' + html + '' + tag + '>');
+ return $;
+}
+
+function cleanForHeight($img, $) {
+ var height = parseInt($img.attr('height'), 10);
+ var width = parseInt($img.attr('width'), 10) || 20;
+
+ // Remove images that explicitly have very small heights or
+ // widths, because they are most likely shims or icons,
+ // which aren't very useful for reading.
+ if ((height || 20) < 10 || width < 10) {
+ $img.remove();
+ } else if (height) {
+ // Don't ever specify a height on images, so that we can
+ // scale with respect to width without screwing up the
+ // aspect ratio.
+ $img.removeAttr('height');
+ }
+
+ return $;
+}
+
+// Cleans out images where the source string matches transparent/spacer/etc
// TODO This seems very aggressive - AP
-function removeSpacers($img,$){if(SPACER_RE.test($img.attr('src'))){$img.remove();}return $;}function cleanImages($article,$){$article.find('img').each(function(index,img){var $img=$(img);cleanForHeight($img,$);removeSpacers($img,$);});return $;}function markToKeep(article,$,url){var tags=arguments.length>3&&arguments[3]!==undefined?arguments[3]:[];if(tags.length===0){tags=KEEP_SELECTORS;}if(url){var _URL$parse=URL$1.parse(url),protocol=_URL$parse.protocol,hostname=_URL$parse.hostname;tags=[].concat(_toConsumableArray(tags),['iframe[src^="'+protocol+'//'+hostname+'"]']);}$(tags.join(','),article).addClass(KEEP_CLASS);return $;}function stripJunkTags(article,$){var tags=arguments.length>2&&arguments[2]!==undefined?arguments[2]:[];if(tags.length===0){tags=STRIP_OUTPUT_TAGS;}// Remove matching elements, but ignore
-// any element with a class of mercury-parser-keep
-$(tags.join(','),article).not('.'+KEEP_CLASS).remove();// Remove the mercury-parser-keep class from result
-$('.'+KEEP_CLASS,article).removeClass(KEEP_CLASS);return $;}// H1 tags are typically the article title, which should be extracted
-// by the title extractor instead. If there's less than 3 of them (<3),
-// strip them. Otherwise, turn 'em into H2s.
-function cleanHOnes$$1(article,$){var $hOnes=$('h1',article);if($hOnes.length<3){$hOnes.each(function(index,node){return $(node).remove();});}else{$hOnes.each(function(index,node){convertNodeTo($(node),$,'h2');});}return $;}function removeAllButWhitelist($article){$article.find('*').each(function(index,node){node.attribs=_Reflect$ownKeys$1(node.attribs).reduce(function(acc,attr){if(WHITELIST_ATTRS_RE.test(attr)){return _extends$1({},acc,_defineProperty({},attr,node.attribs[attr]));}return acc;},{});});return $article;}// function removeAttrs(article, $) {
-// REMOVE_ATTRS.forEach((attr) => {
-// $(`[${attr}]`, article).removeAttr(attr);
-// });
-// }
-// Remove attributes like style or align
-function cleanAttributes($article){// Grabbing the parent because at this point
-// $article will be wrapped in a div which will
-// have a score set on it.
-return removeAllButWhitelist($article.parent().length?$article.parent():$article);}function removeEmpty($article,$){$article.find('p').each(function(index,p){var $p=$(p);if($p.find('iframe, img').length===0&&$p.text().trim()==='')$p.remove();});return $;}// // CONTENT FETCHING CONSTANTS ////
+function removeSpacers($img, $) {
+ if (SPACER_RE.test($img.attr('src'))) {
+ $img.remove();
+ }
+
+ return $;
+}
+
+function stripJunkTags(article, $) {
+ var tags = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : [];
+
+ if (tags.length === 0) {
+ tags = STRIP_OUTPUT_TAGS;
+ }
+
+ // Remove matching elements, but ignore
+ // any element with a class of mercury-parser-keep
+ $(tags.join(','), article).not('.' + KEEP_CLASS).remove();
+
+ // Remove the mercury-parser-keep class from result
+ $('.' + KEEP_CLASS, article).removeClass(KEEP_CLASS);
+
+ return $;
+}
+
+function removeAllButWhitelist($article) {
+ $article.find('*').each(function (index, node) {
+ var attrs = getAttrs(node);
+
+ setAttrs(node, _Reflect$ownKeys(attrs).reduce(function (acc, attr) {
+ if (WHITELIST_ATTRS_RE.test(attr)) {
+ return _extends({}, acc, _defineProperty({}, attr, attrs[attr]));
+ }
+
+ return acc;
+ }, {}));
+ });
+
+ return $article;
+}
+
+// // CONTENT FETCHING CONSTANTS ////
+
// A list of strings that can be considered unlikely candidates when
// extracting content from a resource. These strings are joined together
// and then tested for existence using re:test, so may contain simple,
// non-pipe style regular expression queries if necessary.
-var UNLIKELY_CANDIDATES_BLACKLIST$1=['ad-break','adbox','advert','addthis','agegate','aux','blogger-labels','combx','comment','conversation','disqus','entry-unrelated','extra','foot','form','header','hidden','loader','login',// Note: This can hit 'blogindex'.
-'menu','meta','nav','pager','pagination','predicta',// readwriteweb inline ad box
-'presence_control_external',// lifehacker.com container full of false positives
-'popup','printfriendly','related','remove','remark','rss','share','shoutbox','sidebar','sociable','sponsor','tools'];// A list of strings that can be considered LIKELY candidates when
+var UNLIKELY_CANDIDATES_BLACKLIST$1 = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot', 'form', 'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.
+'menu', 'meta', 'nav', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box
+'presence_control_external', // lifehacker.com container full of false positives
+'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'tools'];
+
+// A list of strings that can be considered LIKELY candidates when
// extracting content from a resource. Essentially, the inverse of the
// blacklist above - if something matches both blacklist and whitelist,
// it is kept. This is useful, for example, if something has a className
@@ -381,101 +379,329 @@ var UNLIKELY_CANDIDATES_BLACKLIST$1=['ad-break','adbox','advert','addthis','ageg
// These strings are joined together and then tested for existence using
// re:test, so may contain simple, non-pipe style regular expression queries
// if necessary.
-var UNLIKELY_CANDIDATES_WHITELIST$1=['and','article','body','blogindex','column','content','entry-content-asset','format',// misuse of form
-'hfeed','hentry','hatom','main','page','posts','shadow'];// A list of tags which, if found inside, should cause a to NOT
+var UNLIKELY_CANDIDATES_WHITELIST$1 = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form
+'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];
+
+// A list of tags which, if found inside, should cause a to NOT
// be turned into a paragraph tag. Shallow div tags without these elements
// should be turned into tags.
-var DIV_TO_P_BLOCK_TAGS$1=['a','blockquote','dl','div','img','p','pre','table'].join(',');// A list of tags that should be ignored when trying to find the top candidate
+var DIV_TO_P_BLOCK_TAGS$1 = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');
+
+// A list of tags that should be ignored when trying to find the top candidate
// for a document.
-var NON_TOP_CANDIDATE_TAGS$1=['br','b','i','label','hr','area','base','basefont','input','img','link','meta'];var NON_TOP_CANDIDATE_TAGS_RE$1=new RegExp('^('+NON_TOP_CANDIDATE_TAGS$1.join('|')+')$','i');// A list of selectors that specify, very clearly, either hNews or other
+var NON_TOP_CANDIDATE_TAGS$1 = ['br', 'b', 'i', 'label', 'hr', 'area', 'base', 'basefont', 'input', 'img', 'link', 'meta'];
+
+var NON_TOP_CANDIDATE_TAGS_RE$1 = new RegExp('^(' + NON_TOP_CANDIDATE_TAGS$1.join('|') + ')$', 'i');
+
+// A list of selectors that specify, very clearly, either hNews or other
// very content-specific style content, like Blogger templates.
// More examples here: http://microformats.org/wiki/blog-post-formats
-var HNEWS_CONTENT_SELECTORS$1=[['.hentry','.entry-content'],['entry','.entry-content'],['.entry','.entry_content'],['.post','.postbody'],['.post','.post_body'],['.post','.post-body']];var PHOTO_HINTS$1=['figure','photo','image','caption'];var PHOTO_HINTS_RE$1=new RegExp(PHOTO_HINTS$1.join('|'),'i');// A list of strings that denote a positive scoring for this content as being
+var HNEWS_CONTENT_SELECTORS$1 = [['.hentry', '.entry-content'], ['entry', '.entry-content'], ['.entry', '.entry_content'], ['.post', '.postbody'], ['.post', '.post_body'], ['.post', '.post-body']];
+
+var PHOTO_HINTS$1 = ['figure', 'photo', 'image', 'caption'];
+var PHOTO_HINTS_RE$1 = new RegExp(PHOTO_HINTS$1.join('|'), 'i');
+
+// A list of strings that denote a positive scoring for this content as being
// an article container. Checked against className and id.
//
// TODO: Perhaps have these scale based on their odds of being quality?
-var POSITIVE_SCORE_HINTS$1=['article','articlecontent','instapaper_body','blog','body','content','entry-content-asset','entry','hentry','main','Normal','page','pagination','permalink','post','story','text','[-_]copy',// usatoday
-'\\Bcopy'];// The above list, joined into a matching regular expression
-var POSITIVE_SCORE_RE$1=new RegExp(POSITIVE_SCORE_HINTS$1.join('|'),'i');// Readability publisher-specific guidelines
-var READABILITY_ASSET$1=new RegExp('entry-content-asset','i');// A list of strings that denote a negative scoring for this content as being
+var POSITIVE_SCORE_HINTS$1 = ['article', 'articlecontent', 'instapaper_body', 'blog', 'body', 'content', 'entry-content-asset', 'entry', 'hentry', 'main', 'Normal', 'page', 'pagination', 'permalink', 'post', 'story', 'text', '[-_]copy', // usatoday
+'\\Bcopy'];
+
+// The above list, joined into a matching regular expression
+var POSITIVE_SCORE_RE$1 = new RegExp(POSITIVE_SCORE_HINTS$1.join('|'), 'i');
+
+// Readability publisher-specific guidelines
+var READABILITY_ASSET$1 = new RegExp('entry-content-asset', 'i');
+
+// A list of strings that denote a negative scoring for this content as being
// an article container. Checked against className and id.
//
// TODO: Perhaps have these scale based on their odds of being quality?
-var NEGATIVE_SCORE_HINTS$1=['adbox','advert','author','bio','bookmark','bottom','byline','clear','com-','combx','comment','comment\\B','contact','copy','credit','crumb','date','deck','excerpt','featured',// tnr.com has a featured_content which throws us off
-'foot','footer','footnote','graf','head','info','infotext',// newscientist.com copyright
-'instapaper_ignore','jump','linebreak','link','masthead','media','meta','modal','outbrain',// slate.com junk
-'promo','pr_',// autoblog - press release
-'related','respond','roundcontent',// lifehacker restricted content warning
-'scroll','secondary','share','shopping','shoutbox','side','sidebar','sponsor','stamp','sub','summary','tags','tools','widget'];// The above list, joined into a matching regular expression
-var NEGATIVE_SCORE_RE$1=new RegExp(NEGATIVE_SCORE_HINTS$1.join('|'),'i');// Match a digit. Pretty clear.
+var NEGATIVE_SCORE_HINTS$1 = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'bottom', 'byline', 'clear', 'com-', 'combx', 'comment', 'comment\\B', 'contact', 'copy', 'credit', 'crumb', 'date', 'deck', 'excerpt', 'featured', // tnr.com has a featured_content which throws us off
+'foot', 'footer', 'footnote', 'graf', 'head', 'info', 'infotext', // newscientist.com copyright
+'instapaper_ignore', 'jump', 'linebreak', 'link', 'masthead', 'media', 'meta', 'modal', 'outbrain', // slate.com junk
+'promo', 'pr_', // autoblog - press release
+'related', 'respond', 'roundcontent', // lifehacker restricted content warning
+'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget'];
+// The above list, joined into a matching regular expression
+var NEGATIVE_SCORE_RE$1 = new RegExp(NEGATIVE_SCORE_HINTS$1.join('|'), 'i');
+
+// Match a digit. Pretty clear.
+
+
// Match 2 or more consecutive tags
+
+
// Match 1 BR tag.
+
+
// A list of all of the block level tags known in HTML5 and below. Taken from
// http://bit.ly/qneNIT
+
+
+
// The removal is implemented as a blacklist and whitelist, this test finds
// blacklisted elements that aren't whitelisted. We do this all in one
// expression-both because it's only one pass, and because this skips the
// serialization for whitelisted nodes.
-var candidatesBlacklist$1=UNLIKELY_CANDIDATES_BLACKLIST$1.join('|');var candidatesWhitelist$1=UNLIKELY_CANDIDATES_WHITELIST$1.join('|');var PARAGRAPH_SCORE_TAGS$1=new RegExp('^(p|li|span|pre)$','i');var CHILD_CONTENT_TAGS$1=new RegExp('^(td|blockquote|ol|ul|dl)$','i');var BAD_TAGS$1=new RegExp('^(address|form)$','i');// Get the score of a node based on its className and id.
-function getWeight(node){var classes=node.attr('class');var id=node.attr('id');var score=0;if(id){// if id exists, try to score on both positive and negative
-if(POSITIVE_SCORE_RE$1.test(id)){score+=25;}if(NEGATIVE_SCORE_RE$1.test(id)){score-=25;}}if(classes){if(score===0){// if classes exist and id did not contribute to score
-// try to score on both positive and negative
-if(POSITIVE_SCORE_RE$1.test(classes)){score+=25;}if(NEGATIVE_SCORE_RE$1.test(classes)){score-=25;}}// even if score has been set by id, add score for
-// possible photo matches
-// "try to keep photos if we can"
-if(PHOTO_HINTS_RE$1.test(classes)){score+=10;}// add 25 if class matches entry-content-asset,
-// a class apparently instructed for use in the
-// Readability publisher guidelines
-// https://www.readability.com/developers/guidelines
-if(READABILITY_ASSET$1.test(classes)){score+=25;}}return score;}// returns the score of a node based on
-// the node's score attribute
-// returns null if no score set
-function getScore($node){return parseFloat($node.attr('score'))||null;}// return 1 for every comma in text
-function scoreCommas(text){return(text.match(/,/g)||[]).length;}var idkRe=new RegExp('^(p|pre)$','i');function scoreLength(textLength){var tagName=arguments.length>1&&arguments[1]!==undefined?arguments[1]:'p';var chunks=textLength/50;if(chunks>0){var lengthBonus=void 0;// No idea why p or pre are being tamped down here
-// but just following the source for now
-// Not even sure why tagName is included here,
-// since this is only being called from the context
-// of scoreParagraph
-if(idkRe.test(tagName)){lengthBonus=chunks-2;}else{lengthBonus=chunks-1.25;}return Math.min(Math.max(lengthBonus,0),3);}return 0;}// Score a paragraph using various methods. Things like number of
-// commas, etc. Higher is better.
-function scoreParagraph$$1(node){var score=1;var text=node.text().trim();var textLength=text.length;// If this paragraph is less than 25 characters, don't count it.
-if(textLength<25){return 0;}// Add points for any commas within this paragraph
-score+=scoreCommas(text);// For every 50 characters in this paragraph, add another point. Up
-// to 3 points.
-score+=scoreLength(textLength);// Articles can end with short paragraphs when people are being clever
-// but they can also end with short paragraphs setting up lists of junk
-// that we strip. This negative tweaks junk setup paragraphs just below
-// the cutoff threshold.
-if(text.slice(-1)===':'){score-=1;}return score;}function setScore($node,$,score){$node.attr('score',score);return $node;}function addScore$$1($node,$,amount){try{var score=getOrInitScore$$1($node,$)+amount;setScore($node,$,score);}catch(e){// Ignoring; error occurs in scoreNode
-}return $node;}// Adds 1/4 of a child's score to its parent
-function addToParent$$1(node,$,score){var parent=node.parent();if(parent){addScore$$1(parent,$,score*0.25);}return node;}// gets and returns the score if it exists
+var candidatesBlacklist$1 = UNLIKELY_CANDIDATES_BLACKLIST$1.join('|');
+
+
+var candidatesWhitelist$1 = UNLIKELY_CANDIDATES_WHITELIST$1.join('|');
+
+
+
+
+var PARAGRAPH_SCORE_TAGS$1 = new RegExp('^(p|li|span|pre)$', 'i');
+var CHILD_CONTENT_TAGS$1 = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');
+var BAD_TAGS$1 = new RegExp('^(address|form)$', 'i');
+
+// Get the score of a node based on its className and id.
+function getWeight(node) {
+ var classes = node.attr('class');
+ var id = node.attr('id');
+ var score = 0;
+
+ if (id) {
+ // if id exists, try to score on both positive and negative
+ if (POSITIVE_SCORE_RE$1.test(id)) {
+ score += 25;
+ }
+ if (NEGATIVE_SCORE_RE$1.test(id)) {
+ score -= 25;
+ }
+ }
+
+ if (classes) {
+ if (score === 0) {
+ // if classes exist and id did not contribute to score
+ // try to score on both positive and negative
+ if (POSITIVE_SCORE_RE$1.test(classes)) {
+ score += 25;
+ }
+ if (NEGATIVE_SCORE_RE$1.test(classes)) {
+ score -= 25;
+ }
+ }
+
+ // even if score has been set by id, add score for
+ // possible photo matches
+ // "try to keep photos if we can"
+ if (PHOTO_HINTS_RE$1.test(classes)) {
+ score += 10;
+ }
+
+ // add 25 if class matches entry-content-asset,
+ // a class apparently instructed for use in the
+ // Readability publisher guidelines
+ // https://www.readability.com/developers/guidelines
+ if (READABILITY_ASSET$1.test(classes)) {
+ score += 25;
+ }
+ }
+
+ return score;
+}
+
+// returns the score of a node based on
+// the node's score attribute
+// returns null if no score set
+function getScore($node) {
+ // console.log("NODE", $node, $node.attr('score'))
+ return parseFloat($node.attr('score')) || null;
+}
+
+// return 1 for every comma in text
+function scoreCommas(text) {
+ return (text.match(/,/g) || []).length;
+}
+
+var idkRe = new RegExp('^(p|pre)$', 'i');
+
+function scoreLength(textLength) {
+ var tagName = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'p';
+
+ var chunks = textLength / 50;
+
+ if (chunks > 0) {
+ var lengthBonus = void 0;
+
+ // No idea why p or pre are being tamped down here
+ // but just following the source for now
+ // Not even sure why tagName is included here,
+ // since this is only being called from the context
+ // of scoreParagraph
+ if (idkRe.test(tagName)) {
+ lengthBonus = chunks - 2;
+ } else {
+ lengthBonus = chunks - 1.25;
+ }
+
+ return Math.min(Math.max(lengthBonus, 0), 3);
+ }
+
+ return 0;
+}
+
+// Score a paragraph using various methods. Things like number of
+// commas, etc. Higher is better.
+function scoreParagraph$$1(node) {
+ var score = 1;
+ var text = node.text().trim();
+ var textLength = text.length;
+
+ // If this paragraph is less than 25 characters, don't count it.
+ if (textLength < 25) {
+ return 0;
+ }
+
+ // Add points for any commas within this paragraph
+ score += scoreCommas(text);
+
+ // For every 50 characters in this paragraph, add another point. Up
+ // to 3 points.
+ score += scoreLength(textLength);
+
+ // Articles can end with short paragraphs when people are being clever
+ // but they can also end with short paragraphs setting up lists of junk
+ // that we strip. This negative tweaks junk setup paragraphs just below
+ // the cutoff threshold.
+ if (text.slice(-1) === ':') {
+ score -= 1;
+ }
+
+ return score;
+}
+
+function setScore($node, $, score) {
+ $node.attr('score', score);
+ return $node;
+}
+
+function addScore$$1($node, $, amount) {
+ try {
+ var score = getOrInitScore$$1($node, $) + amount;
+ setScore($node, $, score);
+ } catch (e) {
+ // Ignoring; error occurs in scoreNode
+ }
+
+ return $node;
+}
+
+// Adds 1/4 of a child's score to its parent
+function addToParent$$1(node, $, score) {
+ var parent = node.parent();
+ if (parent) {
+ addScore$$1(parent, $, score * 0.25);
+ }
+
+ return node;
+}
+
+// gets and returns the score if it exists
// if not, initializes a score based on
// the node's tag type
-function getOrInitScore$$1($node,$){var weightNodes=arguments.length>2&&arguments[2]!==undefined?arguments[2]:true;var score=getScore($node);if(score){return score;}score=scoreNode$$1($node);if(weightNodes){score+=getWeight($node);}addToParent$$1($node,$,score);return score;}// Score an individual node. Has some smarts for paragraphs, otherwise
+function getOrInitScore$$1($node, $) {
+ var weightNodes = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : true;
+
+ var score = getScore($node);
+
+ if (score) {
+ return score;
+ }
+
+ score = scoreNode$$1($node);
+
+ if (weightNodes) {
+ score += getWeight($node);
+ }
+
+ addToParent$$1($node, $, score);
+
+ return score;
+}
+
+// Score an individual node. Has some smarts for paragraphs, otherwise
// just scores based on tag.
-function scoreNode$$1($node){var _$node$get=$node.get(0),tagName=_$node$get.tagName;// TODO: Consider ordering by most likely.
-// E.g., if divs are a more common tag on a page,
-// Could save doing that regex test on every node – AP
-if(PARAGRAPH_SCORE_TAGS$1.test(tagName)){return scoreParagraph$$1($node);}else if(tagName==='div'){return 5;}else if(CHILD_CONTENT_TAGS$1.test(tagName)){return 3;}else if(BAD_TAGS$1.test(tagName)){return-3;}else if(tagName==='th'){return-5;}return 0;}function convertSpans$1($node,$){if($node.get(0)){var _$node$get=$node.get(0),tagName=_$node$get.tagName;if(tagName==='span'){// convert spans to divs
-convertNodeTo($node,$,'div');}}}function addScoreTo($node,$,score){if($node){convertSpans$1($node,$);addScore$$1($node,$,score);}}function scorePs($,weightNodes){$('p, pre').not('[score]').each(function(index,node){// The raw score for this paragraph, before we add any parent/child
-// scores.
-var $node=$(node);$node=setScore($node,$,getOrInitScore$$1($node,$,weightNodes));var $parent=$node.parent();var rawScore=scoreNode$$1($node);addScoreTo($parent,$,rawScore,weightNodes);if($parent){// Add half of the individual content score to the
-// grandparent
-addScoreTo($parent.parent(),$,rawScore/2,weightNodes);}});return $;}// score content. Parents get the full value of their children's
-// content score, grandparents half
-function scoreContent$$1($){var weightNodes=arguments.length>1&&arguments[1]!==undefined?arguments[1]:true;// First, look for special hNews based selectors and give them a big
-// boost, if they exist
-HNEWS_CONTENT_SELECTORS$1.forEach(function(_ref){var _ref2=_slicedToArray$1(_ref,2),parentSelector=_ref2[0],childSelector=_ref2[1];$(parentSelector+' '+childSelector).each(function(index,node){addScore$$1($(node).parent(parentSelector),$,80);});});// Doubling this again
-// Previous solution caused a bug
-// in which parents weren't retaining
-// scores. This is not ideal, and
-// should be fixed.
-scorePs($,weightNodes);scorePs($,weightNodes);return $;}var NORMALIZE_RE=/\s{2,}/g;function normalizeSpaces(text){return text.replace(NORMALIZE_RE,' ').trim();}// Given a node type to search for, and a list of regular expressions,
+function scoreNode$$1($node) {
+ var _$node$get = $node.get(0),
+ tagName = _$node$get.tagName;
+
+ // TODO: Consider ordering by most likely.
+ // E.g., if divs are a more common tag on a page,
+ // Could save doing that regex test on every node – AP
+
+
+ if (PARAGRAPH_SCORE_TAGS$1.test(tagName)) {
+ return scoreParagraph$$1($node);
+ } else if (tagName.toLowerCase() === 'div') {
+ return 5;
+ } else if (CHILD_CONTENT_TAGS$1.test(tagName)) {
+ return 3;
+ } else if (BAD_TAGS$1.test(tagName)) {
+ return -3;
+ } else if (tagName.toLowerCase() === 'th') {
+ return -5;
+ }
+
+ return 0;
+}
+
+function convertSpans$1($node, $) {
+ if ($node.get(0)) {
+ var _$node$get = $node.get(0),
+ tagName = _$node$get.tagName;
+
+ if (tagName === 'span') {
+ // convert spans to divs
+ convertNodeTo$$1($node, $, 'div');
+ }
+ }
+}
+
+function addScoreTo($node, $, score) {
+ if ($node) {
+ convertSpans$1($node, $);
+ addScore$$1($node, $, score);
+ }
+}
+
+function scorePs($, weightNodes) {
+ $('p, pre').not('[score]').each(function (index, node) {
+ // The raw score for this paragraph, before we add any parent/child
+ // scores.
+ var $node = $(node);
+ $node = setScore($node, $, getOrInitScore$$1($node, $, weightNodes));
+
+ var $parent = $node.parent();
+ var rawScore = scoreNode$$1($node);
+
+ addScoreTo($parent, $, rawScore, weightNodes);
+ if ($parent) {
+ // Add half of the individual content score to the
+ // grandparent
+ addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);
+ }
+ });
+
+ return $;
+}
+
+var NORMALIZE_RE = /\s{2,}/g;
+
+function normalizeSpaces(text) {
+ return text.replace(NORMALIZE_RE, ' ').trim();
+}
+
+// Given a node type to search for, and a list of regular expressions,
// look to see if this extraction can be found in the URL. Expects
// that each expression in r_list will return group(1) as the proper
// string to be cleaned.
// Only used for date_published currently.
-function extractFromUrl(url,regexList){var matchRe=regexList.find(function(re){return re.test(url);});if(matchRe){return matchRe.exec(url)[1];}return null;}// An expression that looks to try to find the page digit within a URL, if
+
+// An expression that looks to try to find the page digit within a URL, if
// it exists.
// Matches:
// page=1
@@ -491,1579 +717,1434 @@ function extractFromUrl(url,regexList){var matchRe=regexList.find(function(re){r
// Does not match:
// pg=102
// page:2
-var PAGE_IN_HREF_RE=new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})','i');var HAS_ALPHA_RE=/[a-z]/i;var IS_ALPHA_RE=/^[a-z]+$/i;var IS_DIGIT_RE=/^[0-9]+$/i;function pageNumFromUrl(url){var matches=url.match(PAGE_IN_HREF_RE);if(!matches)return null;var pageNum=parseInt(matches[6],10);// Return pageNum < 100, otherwise
-// return null
-return pageNum<100?pageNum:null;}function removeAnchor(url){return url.split('#')[0].replace(/\/$/,'');}function isGoodSegment(segment,index,firstSegmentHasLetters){var goodSegment=true;// If this is purely a number, and it's the first or second
-// url_segment, it's probably a page number. Remove it.
-if(index<2&&IS_DIGIT_RE.test(segment)&&segment.length<3){goodSegment=true;}// If this is the first url_segment and it's just "index",
-// remove it
-if(index===0&&segment.toLowerCase()==='index'){goodSegment=false;}// If our first or second url_segment is smaller than 3 characters,
-// and the first url_segment had no alphas, remove it.
-if(index<2&&segment.length<3&&!firstSegmentHasLetters){goodSegment=false;}return goodSegment;}// Take a URL, and return the article base of said URL. That is, no
-// pagination data exists in it. Useful for comparing to other links
-// that might have pagination data within them.
-function articleBaseUrl(url,parsed){var parsedUrl=parsed||URL$1.parse(url);var protocol=parsedUrl.protocol,host=parsedUrl.host,path=parsedUrl.path;var firstSegmentHasLetters=false;var cleanedSegments=path.split('/').reverse().reduce(function(acc,rawSegment,index){var segment=rawSegment;// Split off and save anything that looks like a file type.
-if(segment.includes('.')){var _segment$split=segment.split('.'),_segment$split2=_slicedToArray$1(_segment$split,2),possibleSegment=_segment$split2[0],fileExt=_segment$split2[1];if(IS_ALPHA_RE.test(fileExt)){segment=possibleSegment;}}// If our first or second segment has anything looking like a page
-// number, remove it.
-if(PAGE_IN_HREF_RE.test(segment)&&index<2){segment=segment.replace(PAGE_IN_HREF_RE,'');}// If we're on the first segment, check to see if we have any
-// characters in it. The first segment is actually the last bit of
-// the URL, and this will be helpful to determine if we're on a URL
-// segment that looks like "/2/" for example.
-if(index===0){firstSegmentHasLetters=HAS_ALPHA_RE.test(segment);}// If it's not marked for deletion, push it to cleaned_segments.
-if(isGoodSegment(segment,index,firstSegmentHasLetters)){acc.push(segment);}return acc;},[]);return protocol+'//'+host+cleanedSegments.reverse().join('/');}// Given a string, return True if it appears to have an ending sentence
-// within it, false otherwise.
-var SENTENCE_END_RE=new RegExp('.( |$)');function hasSentenceEnd(text){return SENTENCE_END_RE.test(text);}function excerptContent(content){var words=arguments.length>1&&arguments[1]!==undefined?arguments[1]:10;return content.trim().split(/\s+/).slice(0,words).join(' ');}// Now that we have a top_candidate, look through the siblings of
-// it to see if any of them are decently scored. If they are, they
-// may be split parts of the content (Like two divs, a preamble and
-// a body.) Example:
-// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14
-function mergeSiblings($candidate,topScore,$){if(!$candidate.parent().length){return $candidate;}var siblingScoreThreshold=Math.max(10,topScore*0.25);var wrappingDiv=$('');$candidate.parent().children().each(function(index,sibling){var $sibling=$(sibling);// Ignore tags like BR, HR, etc
-if(NON_TOP_CANDIDATE_TAGS_RE$1.test(sibling.tagName)){return null;}var siblingScore=getScore($sibling);if(siblingScore){if($sibling===$candidate){wrappingDiv.append($sibling);}else{var contentBonus=0;var density=linkDensity($sibling);// If sibling has a very low link density,
-// give it a small bonus
-if(density<0.05){contentBonus+=20;}// If sibling has a high link density,
-// give it a penalty
-if(density>=0.5){contentBonus-=20;}// If sibling node has the same class as
-// candidate, give it a bonus
-if($sibling.attr('class')===$candidate.attr('class')){contentBonus+=topScore*0.2;}var newScore=siblingScore+contentBonus;if(newScore>=siblingScoreThreshold){return wrappingDiv.append($sibling);}else if(sibling.tagName==='p'){var siblingContent=$sibling.text();var siblingContentLength=textLength(siblingContent);if(siblingContentLength>80&&density<0.25){return wrappingDiv.append($sibling);}else if(siblingContentLength<=80&&density===0&&hasSentenceEnd(siblingContent)){return wrappingDiv.append($sibling);}}}}return null;});return wrappingDiv;}// After we've calculated scores, loop through all of the possible
-// candidate nodes we found and find the one with the highest score.
-function findTopCandidate$$1($){var $candidate=void 0;var topScore=0;$('[score]').each(function(index,node){// Ignore tags like BR, HR, etc
-if(NON_TOP_CANDIDATE_TAGS_RE$1.test(node.tagName)){return;}var $node=$(node);var score=getScore($node);if(score>topScore){topScore=score;$candidate=$node;}});// If we don't have a candidate, return the body
-// or whatever the first element is
-if(!$candidate){return $('body')||$('*').first();}$candidate=mergeSiblings($candidate,topScore,$);return $candidate;}// Scoring
-function removeUnlessContent($node,$,weight){// Explicitly save entry-content-asset tags, which are
-// noted as valuable in the Publisher guidelines. For now
-// this works everywhere. We may want to consider making
-// this less of a sure-thing later.
-if($node.hasClass('entry-content-asset')){return;}var content=normalizeSpaces($node.text());if(scoreCommas(content)<10){var pCount=$('p',$node).length;var inputCount=$('input',$node).length;// Looks like a form, too many inputs.
-if(inputCount>pCount/3){$node.remove();return;}var contentLength=content.length;var imgCount=$('img',$node).length;// Content is too short, and there are no images, so
-// this is probably junk content.
-if(contentLength<25&&imgCount===0){$node.remove();return;}var density=linkDensity($node);// Too high of link density, is probably a menu or
-// something similar.
-// console.log(weight, density, contentLength)
-if(weight<25&&density>0.2&&contentLength>75){$node.remove();return;}// Too high of a link density, despite the score being
-// high.
-if(weight>=25&&density>0.5){// Don't remove the node if it's a list and the
-// previous sibling starts with a colon though. That
-// means it's probably content.
-var tagName=$node.get(0).tagName;var nodeIsList=tagName==='ol'||tagName==='ul';if(nodeIsList){var previousNode=$node.prev();if(previousNode&&normalizeSpaces(previousNode.text()).slice(-1)===':'){return;}}$node.remove();return;}var scriptCount=$('script',$node).length;// Too many script tags, not enough content.
-if(scriptCount>0&&contentLength<150){$node.remove();return;}}}// Given an article, clean it of some superfluous content specified by
-// tags. Things like forms, ads, etc.
-//
-// Tags is an array of tag name's to search through. (like div, form,
-// etc)
-//
-// Return this same doc.
-function cleanTags$$1($article,$){$(CLEAN_CONDITIONALLY_TAGS,$article).each(function(index,node){var $node=$(node);var weight=getScore($node);if(!weight){weight=getOrInitScore$$1($node,$);setScore($node,$,weight);}// drop node if its weight is < 0
-if(weight<0){$node.remove();}else{// deteremine if node seems like content
-removeUnlessContent($node,$,weight);}});return $;}function cleanHeaders($article,$){var title=arguments.length>2&&arguments[2]!==undefined?arguments[2]:'';$(HEADER_TAG_LIST,$article).each(function(index,header){var $header=$(header);// Remove any headers that appear before all other p tags in the
-// document. This probably means that it was part of the title, a
-// subtitle or something else extraneous like a datestamp or byline,
-// all of which should be handled by other metadata handling.
-if($($header,$article).prevAll('p').length===0){return $header.remove();}// Remove any headers that match the title exactly.
-if(normalizeSpaces($(header).text())===title){return $header.remove();}// If this header has a negative weight, it's probably junk.
-// Get rid of it.
-if(getWeight($(header))<0){return $header.remove();}return $header;});return $;}// Rewrite the tag name to div if it's a top level node like body or
-// html to avoid later complications with multiple body tags.
-function rewriteTopLevel$$1(article,$){// I'm not using context here because
-// it's problematic when converting the
-// top-level/root node - AP
-$=convertNodeTo($('html'),$,'div');$=convertNodeTo($('body'),$,'div');return $;}function absolutize($,rootUrl,attr,$content){$('['+attr+']',$content).each(function(_,node){var url=node.attribs[attr];var absoluteUrl=URL$1.resolve(rootUrl,url);node.attribs[attr]=absoluteUrl;});}function makeLinksAbsolute($content,$,url){['href','src'].forEach(function(attr){return absolutize($,url,attr,$content);});return $content;}function textLength(text){return text.trim().replace(/\s+/g,' ').length;}// Determines what percentage of the text
+var PAGE_IN_HREF_RE = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');
+
+var HAS_ALPHA_RE = /[a-z]/i;
+
+var IS_ALPHA_RE = /^[a-z]+$/i;
+var IS_DIGIT_RE = /^[0-9]+$/i;
+
+function isGoodSegment(segment, index, firstSegmentHasLetters) {
+ var goodSegment = true;
+
+ // If this is purely a number, and it's the first or second
+ // url_segment, it's probably a page number. Remove it.
+ if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {
+ goodSegment = true;
+ }
+
+ // If this is the first url_segment and it's just "index",
+ // remove it
+ if (index === 0 && segment.toLowerCase() === 'index') {
+ goodSegment = false;
+ }
+
+ // If our first or second url_segment is smaller than 3 characters,
+ // and the first url_segment had no alphas, remove it.
+ if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {
+ goodSegment = false;
+ }
+
+ return goodSegment;
+}
+
+// Given a string, return True if it appears to have an ending sentence
+// within it, false otherwise.
+var SENTENCE_END_RE = new RegExp('.( |$)');
+function hasSentenceEnd(text) {
+ return SENTENCE_END_RE.test(text);
+}
+
+// Now that we have a top_candidate, look through the siblings of
+// it to see if any of them are decently scored. If they are, they
+// may be split parts of the content (Like two divs, a preamble and
+// a body.) Example:
+// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14
+function mergeSiblings($candidate, topScore, $) {
+ if (!$candidate.parent().length) {
+ return $candidate;
+ }
+
+ var siblingScoreThreshold = Math.max(10, topScore * 0.25);
+ var wrappingDiv = $('');
+
+ $candidate.parent().children().each(function (index, sibling) {
+ var $sibling = $(sibling);
+ // Ignore tags like BR, HR, etc
+ if (NON_TOP_CANDIDATE_TAGS_RE$1.test(sibling.tagName)) {
+ return null;
+ }
+
+ var siblingScore = getScore($sibling);
+ if (siblingScore) {
+ if ($sibling.get(0) === $candidate.get(0)) {
+ wrappingDiv.append($sibling);
+ } else {
+ var contentBonus = 0;
+ var density = linkDensity($sibling);
+
+ // If sibling has a very low link density,
+ // give it a small bonus
+ if (density < 0.05) {
+ contentBonus += 20;
+ }
+
+ // If sibling has a high link density,
+ // give it a penalty
+ if (density >= 0.5) {
+ contentBonus -= 20;
+ }
+
+ // If sibling node has the same class as
+ // candidate, give it a bonus
+ if ($sibling.attr('class') === $candidate.attr('class')) {
+ contentBonus += topScore * 0.2;
+ }
+
+ var newScore = siblingScore + contentBonus;
+
+ if (newScore >= siblingScoreThreshold) {
+ return wrappingDiv.append($sibling);
+ } else if (sibling.tagName === 'p') {
+ var siblingContent = $sibling.text();
+ var siblingContentLength = textLength(siblingContent);
+
+ if (siblingContentLength > 80 && density < 0.25) {
+ return wrappingDiv.append($sibling);
+ } else if (siblingContentLength <= 80 && density === 0 && hasSentenceEnd(siblingContent)) {
+ return wrappingDiv.append($sibling);
+ }
+ }
+ }
+ }
+
+ return null;
+ });
+
+ if (wrappingDiv.children().length === 1 && wrappingDiv.children().first().get(0) === $candidate.get(0)) {
+ return $candidate;
+ }
+
+ return wrappingDiv;
+}
+
+// Scoring
+
+function removeUnlessContent($node, $, weight) {
+ // Explicitly save entry-content-asset tags, which are
+ // noted as valuable in the Publisher guidelines. For now
+ // this works everywhere. We may want to consider making
+ // this less of a sure-thing later.
+ if ($node.hasClass('entry-content-asset')) {
+ return;
+ }
+
+ var content = normalizeSpaces($node.text());
+
+ if (scoreCommas(content) < 10) {
+ var pCount = $('p', $node).length;
+ var inputCount = $('input', $node).length;
+
+ // Looks like a form, too many inputs.
+ if (inputCount > pCount / 3) {
+ $node.remove();
+ return;
+ }
+
+ var contentLength = content.length;
+ var imgCount = $('img', $node).length;
+
+ // Content is too short, and there are no images, so
+ // this is probably junk content.
+ if (contentLength < 25 && imgCount === 0) {
+ $node.remove();
+ return;
+ }
+
+ var density = linkDensity($node);
+
+ // Too high of link density, is probably a menu or
+ // something similar.
+ // console.log(weight, density, contentLength)
+ if (weight < 25 && density > 0.2 && contentLength > 75) {
+ $node.remove();
+ return;
+ }
+
+ // Too high of a link density, despite the score being
+ // high.
+ if (weight >= 25 && density > 0.5) {
+ // Don't remove the node if it's a list and the
+ // previous sibling starts with a colon though. That
+ // means it's probably content.
+ var tagName = $node.get(0).tagName.toLowerCase();
+ var nodeIsList = tagName === 'ol' || tagName === 'ul';
+ if (nodeIsList) {
+ var previousNode = $node.prev();
+ if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {
+ return;
+ }
+ }
+
+ $node.remove();
+ return;
+ }
+
+ var scriptCount = $('script', $node).length;
+
+ // Too many script tags, not enough content.
+ if (scriptCount > 0 && contentLength < 150) {
+ $node.remove();
+ return;
+ }
+ }
+}
+
+/* eslint-disable */
+function absolutize($, rootUrl, attr, $content) {
+ $('[' + attr + ']', $content).each(function (_, node) {
+ var attrs = getAttrs(node);
+ var url = attrs[attr];
+
+ if (url) {
+ var absoluteUrl = URL.resolve(rootUrl, url);
+ setAttr(node, attr, absoluteUrl);
+ }
+ });
+}
+
+function makeLinksAbsolute$$1($content, $, url) {
+ ['href', 'src'].forEach(function (attr) {
+ return absolutize($, url, attr, $content);
+ });
+
+ return $content;
+}
+
+function textLength(text) {
+ return text.trim().replace(/\s+/g, ' ').length;
+}
+
+// Determines what percentage of the text
// in a node is link text
// Takes a node, returns a float
-function linkDensity($node){var totalTextLength=textLength($node.text());var linkText=$node.find('a').text();var linkLength=textLength(linkText);if(totalTextLength>0){return linkLength/totalTextLength;}else if(totalTextLength===0&&linkLength>0){return 1;}return 0;}// Given a node type to search for, and a list of meta tag names to
-// search for, find a meta tag associated.
-function extractFromMeta$$1($,metaNames,cachedNames){var cleanTags$$1=arguments.length>3&&arguments[3]!==undefined?arguments[3]:true;var foundNames=metaNames.filter(function(name){return cachedNames.indexOf(name)!==-1;});var _iteratorNormalCompletion=true;var _didIteratorError=false;var _iteratorError=undefined;try{var _loop=function _loop(){var name=_step.value;var type='name';var value='value';var nodes=$('meta['+type+'="'+name+'"]');// Get the unique value of every matching node, in case there
-// are two meta tags with the same name and value.
-// Remove empty values.
-var values=nodes.map(function(index,node){return $(node).attr(value);}).toArray().filter(function(text){return text!=='';});// If we have more than one value for the same name, we have a
-// conflict and can't trust any of them. Skip this name. If we have
-// zero, that means our meta tags had no values. Skip this name
-// also.
-if(values.length===1){var metaValue=void 0;// Meta values that contain HTML should be stripped, as they
-// weren't subject to cleaning previously.
-if(cleanTags$$1){metaValue=stripTags(values[0],$);}else{metaValue=values[0];}return{v:metaValue};}};for(var _iterator=_getIterator$1(foundNames),_step;!(_iteratorNormalCompletion=(_step=_iterator.next()).done);_iteratorNormalCompletion=true){var _ret=_loop();if((typeof _ret==='undefined'?'undefined':_typeof$1(_ret))==="object")return _ret.v;}// If nothing is found, return null
-}catch(err){_didIteratorError=true;_iteratorError=err;}finally{try{if(!_iteratorNormalCompletion&&_iterator.return){_iterator.return();}}finally{if(_didIteratorError){throw _iteratorError;}}}return null;}function isGoodNode($node,maxChildren){// If it has a number of children, it's more likely a container
-// element. Skip it.
-if($node.children().length>maxChildren){return false;}// If it looks to be within a comment, skip it.
-if(withinComment($node)){return false;}return true;}// Given a a list of selectors find content that may
-// be extractable from the document. This is for flat
-// meta-information, like author, title, date published, etc.
-function extractFromSelectors$$1($,selectors){var maxChildren=arguments.length>2&&arguments[2]!==undefined?arguments[2]:1;var textOnly=arguments.length>3&&arguments[3]!==undefined?arguments[3]:true;var _iteratorNormalCompletion=true;var _didIteratorError=false;var _iteratorError=undefined;try{for(var _iterator=_getIterator$1(selectors),_step;!(_iteratorNormalCompletion=(_step=_iterator.next()).done);_iteratorNormalCompletion=true){var selector=_step.value;var nodes=$(selector);// If we didn't get exactly one of this selector, this may be
-// a list of articles or comments. Skip it.
-if(nodes.length===1){var $node=$(nodes[0]);if(isGoodNode($node,maxChildren)){var content=void 0;if(textOnly){content=$node.text();}else{content=$node.html();}if(content){return content;}}}}}catch(err){_didIteratorError=true;_iteratorError=err;}finally{try{if(!_iteratorNormalCompletion&&_iterator.return){_iterator.return();}}finally{if(_didIteratorError){throw _iteratorError;}}}return null;}// strips all tags from a string of text
-function stripTags(text,$){// Wrapping text in html element prevents errors when text
-// has no html
-var cleanText=$(''+text+'').text();return cleanText===''?text:cleanText;}function withinComment($node){var parents=$node.parents().toArray();var commentParent=parents.find(function(parent){var classAndId=parent.attribs.class+' '+parent.attribs.id;return classAndId.includes('comment');});return commentParent!==undefined;}// Given a node, determine if it's article-like enough to return
-// param: node (a cheerio node)
-// return: boolean
-function nodeIsSufficient($node){return $node.text().trim().length>=100;}function isWordpress($){return $(IS_WP_SELECTOR).length>0;}// DOM manipulation
-// CLEAN AUTHOR CONSTANTS
-var CLEAN_AUTHOR_RE=/^\s*(posted |written )?by\s*:?\s*(.*)/i;// author = re.sub(r'^\s*(posted |written )?by\s*:?\s*(.*)(?i)',
-// CLEAN DEK CONSTANTS
-var TEXT_LINK_RE=new RegExp('http(s)?://','i');// An ordered list of meta tag names that denote likely article deks.
-// From most distinct to least distinct.
-//
-// NOTE: There are currently no meta tags that seem to provide the right
-// content consistenty enough. Two options were:
-// - og:description
-// - dc.description
-// However, these tags often have SEO-specific junk in them that's not
-// header-worthy like a dek is. Excerpt material at best.
-// An ordered list of Selectors to find likely article deks. From
-// most explicit to least explicit.
-//
-// Should be more restrictive than not, as a failed dek can be pretty
-// detrimental to the aesthetics of an article.
-// CLEAN DATE PUBLISHED CONSTANTS
-var MS_DATE_STRING=/^\d{13}$/i;var SEC_DATE_STRING=/^\d{10}$/i;var CLEAN_DATE_STRING_RE=/^\s*published\s*:?\s*(.*)/i;var TIME_MERIDIAN_SPACE_RE=/(.*\d)(am|pm)(.*)/i;var TIME_MERIDIAN_DOTS_RE=/\.m\./i;var months=['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'];var allMonths=months.join('|');var timestamp1='[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';var timestamp2='[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';var SPLIT_DATE_STRING=new RegExp('('+timestamp1+')|('+timestamp2+')|([0-9]{1,4})|('+allMonths+')','ig');// CLEAN TITLE CONSTANTS
-// A regular expression that will match separating characters on a
-// title, that usually denote breadcrumbs or something similar.
-var TITLE_SPLITTERS_RE=/(: | - | \| )/g;var DOMAIN_ENDINGS_RE=new RegExp('.com$|.net$|.org$|.co.uk$','g');// Take an author string (like 'By David Smith ') and clean it to
-// just the name(s): 'David Smith'.
-function cleanAuthor(author){return author.replace(CLEAN_AUTHOR_RE,'$2').trim();}function clean$1(leadImageUrl){leadImageUrl=leadImageUrl.trim();if(validUrl$1.isWebUri(leadImageUrl)){return leadImageUrl;}return null;}// Take a dek HTML fragment, and return the cleaned version of it.
-// Return None if the dek wasn't good enough.
-function cleanDek(dek,_ref){var $=_ref.$,excerpt=_ref.excerpt;// Sanity check that we didn't get too short or long of a dek.
-if(dek.length>1000||dek.length<5)return null;// Check that dek isn't the same as excerpt
-if(excerpt&&excerptContent(excerpt,10)===excerptContent(dek,10))return null;var dekText=stripTags(dek,$);// Plain text links shouldn't exist in the dek. If we have some, it's
-// not a good dek - bail.
-if(TEXT_LINK_RE.test(dekText))return null;return dekText.trim();}// Is there a compelling reason to use moment here?
-// Mostly only being used for the isValid() method,
-// but could just check for 'Invalid Date' string.
-function cleanDateString(dateString){return(dateString.match(SPLIT_DATE_STRING)||[]).join(' ').replace(TIME_MERIDIAN_DOTS_RE,'m').replace(TIME_MERIDIAN_SPACE_RE,'$1 $2 $3').replace(CLEAN_DATE_STRING_RE,'$1').trim();}// Take a date published string, and hopefully return a date out of
-// it. Return none if we fail.
-function cleanDatePublished(dateString){// If string is in milliseconds or seconds, convert to int
-if(MS_DATE_STRING.test(dateString)||SEC_DATE_STRING.test(dateString)){dateString=parseInt(dateString,10);}var date=moment$1(new Date(dateString));if(!date.isValid()){dateString=cleanDateString(dateString);date=moment$1(new Date(dateString));}return date.isValid()?date.toISOString():null;}// Clean our article content, returning a new, cleaned node.
-function extractCleanNode(article,_ref){var $=_ref.$,_ref$cleanConditional=_ref.cleanConditionally,cleanConditionally=_ref$cleanConditional===undefined?true:_ref$cleanConditional,_ref$title=_ref.title,title=_ref$title===undefined?'':_ref$title,_ref$url=_ref.url,url=_ref$url===undefined?'':_ref$url,_ref$defaultCleaner=_ref.defaultCleaner,defaultCleaner=_ref$defaultCleaner===undefined?true:_ref$defaultCleaner;// Rewrite the tag name to div if it's a top level node like body or
-// html to avoid later complications with multiple body tags.
-rewriteTopLevel$$1(article,$);// Drop small images and spacer images
-// Only do this is defaultCleaner is set to true;
-// this can sometimes be too aggressive.
-if(defaultCleaner)cleanImages(article,$);// Mark elements to keep that would normally be removed.
-// E.g., stripJunkTags will remove iframes, so we're going to mark
-// YouTube/Vimeo videos as elements we want to keep.
-markToKeep(article,$,url);// Drop certain tags like , etc
-// This is -mostly- for cleanliness, not security.
-stripJunkTags(article,$);// H1 tags are typically the article title, which should be extracted
-// by the title extractor instead. If there's less than 3 of them (<3),
-// strip them. Otherwise, turn 'em into H2s.
-cleanHOnes$$1(article,$);// Clean headers
-cleanHeaders(article,$,title);// Make links absolute
-makeLinksAbsolute(article,$,url);// We used to clean UL's and OL's here, but it was leading to
-// too many in-article lists being removed. Consider a better
-// way to detect menus particularly and remove them.
-// Also optionally running, since it can be overly aggressive.
-if(defaultCleaner)cleanTags$$1(article,$,cleanConditionally);// Remove empty paragraph nodes
-removeEmpty(article,$);// Remove unnecessary attributes
-cleanAttributes(article,$);return article;}function cleanTitle$$1(title,_ref){var url=_ref.url,$=_ref.$;// If title has |, :, or - in it, see if
-// we can clean it up.
-if(TITLE_SPLITTERS_RE.test(title)){title=resolveSplitTitle(title,url);}// Final sanity check that we didn't get a crazy title.
-// if (title.length > 150 || title.length < 15) {
-if(title.length>150){// If we did, return h1 from the document if it exists
-var h1=$('h1');if(h1.length===1){title=h1.text();}}// strip any html tags in the title text
-return stripTags(title,$).trim();}function extractBreadcrumbTitle(splitTitle,text){// This must be a very breadcrumbed title, like:
-// The Best Gadgets on Earth : Bits : Blogs : NYTimes.com
-// NYTimes - Blogs - Bits - The Best Gadgets on Earth
-if(splitTitle.length>=6){var _ret=function(){// Look to see if we can find a breadcrumb splitter that happens
-// more than once. If we can, we'll be able to better pull out
-// the title.
-var termCounts=splitTitle.reduce(function(acc,titleText){acc[titleText]=acc[titleText]?acc[titleText]+1:1;return acc;},{});var _Reflect$ownKeys$redu=_Reflect$ownKeys$1(termCounts).reduce(function(acc,key){if(acc[1]> "
-// will match, but nothing longer than that.
-if(termCount>=2&&maxTerm.length<=4){splitTitle=text.split(maxTerm);}var splitEnds=[splitTitle[0],splitTitle.slice(-1)];var longestEnd=splitEnds.reduce(function(acc,end){return acc.length>end.length?acc:end;},'');if(longestEnd.length>10){return{v:longestEnd};}return{v:text};}();if((typeof _ret==='undefined'?'undefined':_typeof$1(_ret))==="object")return _ret.v;}return null;}function cleanDomainFromTitle(splitTitle,url){// Search the ends of the title, looking for bits that fuzzy match
-// the URL too closely. If one is found, discard it and return the
-// rest.
+function linkDensity($node) {
+ var totalTextLength = textLength($node.text());
+
+ var linkText = $node.find('a').text();
+ var linkLength = textLength(linkText);
+
+ if (totalTextLength > 0) {
+ return linkLength / totalTextLength;
+ } else if (totalTextLength === 0 && linkLength > 0) {
+ return 1;
+ }
+
+ return 0;
+}
+
+function isGoodNode($node, maxChildren) {
+ // If it has a number of children, it's more likely a container
+ // element. Skip it.
+ if ($node.children().length > maxChildren) {
+ return false;
+ }
+ // If it looks to be within a comment, skip it.
+ if (withinComment$$1($node)) {
+ return false;
+ }
+
+ return true;
+}
+
+// strips all tags from a string of text
+function stripTags(text, $) {
+ // Wrapping text in html element prevents errors when text
+ // has no html
+ var cleanText = $('' + text + '').text();
+ return cleanText === '' ? text : cleanText;
+}
+
+function withinComment$$1($node) {
+ var parents = $node.parents().toArray();
+ var commentParent = parents.find(function (parent) {
+ var attrs = getAttrs(parent);
+ var nodeClass = attrs.class,
+ id = attrs.id;
+
+ var classAndId = nodeClass + ' ' + id;
+ return classAndId.includes('comment');
+ });
+
+ return commentParent !== undefined;
+}
+
+// Given a node, determine if it's article-like enough to return
+// param: node (a cheerio node)
+// return: boolean
+
+function getAttrs(node) {
+ var attribs = node.attribs,
+ attributes = node.attributes;
+
+
+ if (!attribs && attributes) {
+ var attrs = _Reflect$ownKeys(attributes).reduce(function (acc, index) {
+ var attr = attributes[index];
+
+ if (!attr.name || !attr.value) return acc;
+
+ acc[attr.name] = attr.value;
+ return acc;
+ }, {});
+ return attrs;
+ }
+
+ return attribs;
+}
+
+function setAttr(node, attr, val) {
+ if (node.attribs) {
+ node.attribs[attr] = val;
+ } else if (node.attributes) {
+ node.setAttribute(attr, val);
+ }
+
+ return node;
+}
+
+/* eslint-disable */
+function setAttrs(node, attrs) {
+ if (node.attribs) {
+ node.attribs = attrs;
+ } else if (node.attributes) {
+ while (node.attributes.length > 0) {
+ node.removeAttribute(node.attributes[0].name);
+ }_Reflect$ownKeys(attrs).forEach(function (key) {
+ node.setAttribute(key, attrs[key]);
+ });
+ }
+
+ return node;
+}
+
+// DOM manipulation
+
+function _interopDefault$1(ex){return ex&&(typeof ex==='undefined'?'undefined':_typeof(ex))==='object'&&'default'in ex?ex['default']:ex;}var _regeneratorRuntime=_interopDefault$1(regenerator);var _extends$1=_interopDefault$1(_extends);var _asyncToGenerator=_interopDefault$1(asyncToGenerator);var URL$1=_interopDefault$1(URL);var cheerio$1=_interopDefault$1(cheerio);var _Promise=_interopDefault$1(promise);var request$1=_interopDefault$1(request);var _Reflect$ownKeys$1=_interopDefault$1(_Reflect$ownKeys);var _toConsumableArray$1=_interopDefault$1(_toConsumableArray);var _defineProperty$1=_interopDefault$1(_defineProperty);var _slicedToArray$1=_interopDefault$1(_slicedToArray);var _typeof$1=_interopDefault$1(_typeof);var _getIterator$1=_interopDefault$1(_getIterator);var _Object$keys=_interopDefault$1(keys);var stringDirection$1=_interopDefault$1(stringDirection);var validUrl$1=_interopDefault$1(validUrl);var moment$1=_interopDefault$1(moment);var wuzzy$1=_interopDefault$1(wuzzy);var difflib$1=_interopDefault$1(difflib);var _Array$from=_interopDefault$1(from);var ellipsize$1=_interopDefault$1(ellipsize);var _marked=[range].map(_regeneratorRuntime.mark);function range(){var start=arguments.length>0&&arguments[0]!==undefined?arguments[0]:1;var end=arguments.length>1&&arguments[1]!==undefined?arguments[1]:1;return _regeneratorRuntime.wrap(function range$(_context){while(1){switch(_context.prev=_context.next){case 0:if(!(start<=end)){_context.next=5;break;}_context.next=3;return start+=1;case 3:_context.next=0;break;case 5:case"end":return _context.stop();}}},_marked[0],this);}// extremely simple url validation as a first step
+function validateUrl(_ref){var hostname=_ref.hostname;// If this isn't a valid url, return an error message
+return!!hostname;}var Errors={badUrl:{error:true,messages:'The url parameter passed does not look like a valid URL. Please check your data and try again.'}};var REQUEST_HEADERS={'User-Agent':'Readability - http://readability.com/about/'};// The number of milliseconds to attempt to fetch a resource before timing out.
+var FETCH_TIMEOUT=10000;// Content types that we do not extract content from
+var BAD_CONTENT_TYPES=['audio/mpeg','image/gif','image/jpeg','image/jpg'];var BAD_CONTENT_TYPES_RE=new RegExp('^('+BAD_CONTENT_TYPES.join('|')+')$','i');// Use this setting as the maximum size an article can be
+// for us to attempt parsing. Defaults to 5 MB.
+var MAX_CONTENT_LENGTH=5242880;// Turn the global proxy on or off
+// Proxying is not currently enabled in Python source
+// so not implementing logic in port.
+function get(options){// eslint-disable-line
+return new _Promise(function(resolve,reject){request$1(options,function(err,response,body){if(err){reject(err);}else{resolve({body:body,response:response});}});});}// Evaluate a response to ensure it's something we should be keeping.
+// This does not validate in the sense of a response being 200 level or
+// not. Validation here means that we haven't found reason to bail from
+// further processing of this url.
+function validateResponse(response){var parseNon2xx=arguments.length>1&&arguments[1]!==undefined?arguments[1]:false;// Check if we got a valid status code
+// This isn't great, but I'm requiring a statusMessage to be set
+// before short circuiting b/c nock doesn't set it in tests
+// statusMessage only not set in nock response, in which case
+// I check statusCode, which is currently only 200 for OK responses
+// in tests
+if(response.statusMessage&&response.statusMessage!=='OK'||response.statusCode!==200){if(!response.statusCode){throw new Error('Unable to fetch content. Original exception was '+response.error);}else if(!parseNon2xx){throw new Error('Resource returned a response status code of '+response.statusCode+' and resource was instructed to reject non-2xx level status codes.');}}var _response$headers=response.headers,contentType=_response$headers['content-type'],contentLength=_response$headers['content-length'];// Check that the content is not in BAD_CONTENT_TYPES
+if(BAD_CONTENT_TYPES_RE.test(contentType)){throw new Error('Content-type for this resource was '+contentType+' and is not allowed.');}// Check that the content length is below maximum
+if(contentLength>MAX_CONTENT_LENGTH){throw new Error('Content for this resource was too large. Maximum content length is '+MAX_CONTENT_LENGTH+'.');}return true;}// Grabs the last two pieces of the URL and joins them back together
+// This is to get the 'livejournal.com' from 'erotictrains.livejournal.com'
+// Set our response attribute to the result of fetching our URL.
+// TODO: This should gracefully handle timeouts and raise the
+// proper exceptions on the many failure cases of HTTP.
+// TODO: Ensure we are not fetching something enormous. Always return
+// unicode content for HTML, with charset conversion.
+var fetchResource$1=function(){var _ref2=_asyncToGenerator(_regeneratorRuntime.mark(function _callee(url,parsedUrl){var options,_ref3,response,body;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:parsedUrl=parsedUrl||URL$1.parse(encodeURI(url));options={url:parsedUrl,headers:_extends$1({},REQUEST_HEADERS),timeout:FETCH_TIMEOUT,// Don't set encoding; fixes issues
+// w/gzipped responses
+encoding:null,// Accept cookies
+jar:true,// Accept and decode gzip
+gzip:true,// Follow any redirect
+followAllRedirects:true};_context.next=4;return get(options);case 4:_ref3=_context.sent;response=_ref3.response;body=_ref3.body;_context.prev=7;validateResponse(response);return _context.abrupt('return',{body:body,response:response});case 12:_context.prev=12;_context.t0=_context['catch'](7);return _context.abrupt('return',Errors.badUrl);case 15:case'end':return _context.stop();}}},_callee,this,[[7,12]]);}));function fetchResource(_x2,_x3){return _ref2.apply(this,arguments);}return fetchResource;}();function convertMetaProp($,from$$1,to){$('meta['+from$$1+']').each(function(_,node){var $node=$(node);var value=$node.attr(from$$1);$node.attr(to,value);$node.removeAttr(from$$1);});return $;}// For ease of use in extracting from meta tags,
+// replace the "content" attribute on meta tags with the
+// "value" attribute.
//
-// Strip out the big TLDs - it just makes the matching a bit more
-// accurate. Not the end of the world if it doesn't strip right.
-var _URL$parse=URL$1.parse(url),host=_URL$parse.host;var nakedDomain=host.replace(DOMAIN_ENDINGS_RE,'');var startSlug=splitTitle[0].toLowerCase().replace(' ','');var startSlugRatio=wuzzy$1.levenshtein(startSlug,nakedDomain);if(startSlugRatio>0.4&&startSlug.length>5){return splitTitle.slice(2).join('');}var endSlug=splitTitle.slice(-1)[0].toLowerCase().replace(' ','');var endSlugRatio=wuzzy$1.levenshtein(endSlug,nakedDomain);if(endSlugRatio>0.4&&endSlug.length>=5){return splitTitle.slice(0,-2).join('');}return null;}// Given a title with separators in it (colons, dashes, etc),
-// resolve whether any of the segments should be removed.
-function resolveSplitTitle(title){var url=arguments.length>1&&arguments[1]!==undefined?arguments[1]:'';// Splits while preserving splitters, like:
-// ['The New New York', ' - ', 'The Washington Post']
-var splitTitle=title.split(TITLE_SPLITTERS_RE);if(splitTitle.length===1){return title;}var newTitle=extractBreadcrumbTitle(splitTitle,title);if(newTitle)return newTitle;newTitle=cleanDomainFromTitle(splitTitle,url);if(newTitle)return newTitle;// Fuzzy ratio didn't find anything, so this title is probably legit.
-// Just return it all.
-return title;}var Cleaners={author:cleanAuthor,lead_image_url:clean$1,dek:cleanDek,date_published:cleanDatePublished,content:extractCleanNode,title:cleanTitle$$1};// Using a variety of scoring techniques, extract the content most
-// likely to be article text.
+// In addition, normalize 'property' attributes to 'name' for ease of
+// querying later. See, e.g., og or twitter meta tags.
+function normalizeMetaTags($){$=convertMetaProp($,'content','value');$=convertMetaProp($,'property','name');return $;}// Spacer images to be removed
+var SPACER_RE$1=new RegExp('trans|transparent|spacer|blank','i');// The class we will use to mark elements we want to keep
+// but would normally remove
+var KEEP_CLASS$1='mercury-parser-keep';var KEEP_SELECTORS$1=['iframe[src^="https://www.youtube.com"]','iframe[src^="http://www.youtube.com"]','iframe[src^="https://player.vimeo"]','iframe[src^="http://player.vimeo"]'];// A list of tags to strip from the output if we encounter them.
+var STRIP_OUTPUT_TAGS$1=['title','script','noscript','link','style','hr','embed','iframe','object'];// cleanAttributes
+var REMOVE_ATTRS$1=['style','align'];var REMOVE_ATTR_SELECTORS$1=REMOVE_ATTRS$1.map(function(selector){return'['+selector+']';});var REMOVE_ATTR_LIST$1=REMOVE_ATTRS$1.join(',');var WHITELIST_ATTRS$1=['src','srcset','href','class','id','alt'];var WHITELIST_ATTRS_RE$1=new RegExp('^('+WHITELIST_ATTRS$1.join('|')+')$','i');// removeEmpty
+var REMOVE_EMPTY_TAGS$1=['p'];var REMOVE_EMPTY_SELECTORS$1=REMOVE_EMPTY_TAGS$1.map(function(tag){return tag+':empty';}).join(',');// cleanTags
+var CLEAN_CONDITIONALLY_TAGS$1=['ul','ol','table','div','button','form'].join(',');// cleanHeaders
+var HEADER_TAGS$1=['h2','h3','h4','h5','h6'];var HEADER_TAG_LIST$1=HEADER_TAGS$1.join(',');// // CONTENT FETCHING CONSTANTS ////
+// A list of strings that can be considered unlikely candidates when
+// extracting content from a resource. These strings are joined together
+// and then tested for existence using re:test, so may contain simple,
+// non-pipe style regular expression queries if necessary.
+var UNLIKELY_CANDIDATES_BLACKLIST$2=['ad-break','adbox','advert','addthis','agegate','aux','blogger-labels','combx','comment','conversation','disqus','entry-unrelated','extra','foot',// 'form', // This is too generic, has too many false positives
+'header','hidden','loader','login',// Note: This can hit 'blogindex'.
+'menu','meta','nav','outbrain','pager','pagination','predicta',// readwriteweb inline ad box
+'presence_control_external',// lifehacker.com container full of false positives
+'popup','printfriendly','related','remove','remark','rss','share','shoutbox','sidebar','sociable','sponsor','taboola','tools'];// A list of strings that can be considered LIKELY candidates when
+// extracting content from a resource. Essentially, the inverse of the
+// blacklist above - if something matches both blacklist and whitelist,
+// it is kept. This is useful, for example, if something has a className
+// of "rss-content entry-content". It matched 'rss', so it would normally
+// be removed, however, it's also the entry content, so it should be left
+// alone.
//
-// If strip_unlikely_candidates is True, remove any elements that
-// match certain criteria first. (Like, does this element have a
-// classname of "comment")
+// These strings are joined together and then tested for existence using
+// re:test, so may contain simple, non-pipe style regular expression queries
+// if necessary.
+var UNLIKELY_CANDIDATES_WHITELIST$2=['and','article','body','blogindex','column','content','entry-content-asset','format',// misuse of form
+'hfeed','hentry','hatom','main','page','posts','shadow'];// A list of tags which, if found inside, should cause a to NOT
+// be turned into a paragraph tag. Shallow div tags without these elements
+// should be turned into tags.
+var DIV_TO_P_BLOCK_TAGS$2=['a','blockquote','dl','div','img','p','pre','table'].join(',');// A list of tags that should be ignored when trying to find the top candidate
+// for a document.
+// A list of selectors that specify, very clearly, either hNews or other
+// very content-specific style content, like Blogger templates.
+// More examples here: http://microformats.org/wiki/blog-post-formats
+// A list of strings that denote a positive scoring for this content as being
+// an article container. Checked against className and id.
//
-// If weight_nodes is True, use classNames and IDs to determine the
-// worthiness of nodes.
+// TODO: Perhaps have these scale based on their odds of being quality?
+var POSITIVE_SCORE_HINTS$2=['article','articlecontent','instapaper_body','blog','body','content','entry-content-asset','entry','hentry','main','Normal','page','pagination','permalink','post','story','text','[-_]copy',// usatoday
+'\\Bcopy'];// The above list, joined into a matching regular expression
+var POSITIVE_SCORE_RE$2=new RegExp(POSITIVE_SCORE_HINTS$2.join('|'),'i');// Readability publisher-specific guidelines
+// A list of strings that denote a negative scoring for this content as being
+// an article container. Checked against className and id.
//
-// Returns a cheerio object $
-function extractBestNode($,opts){// clone the node so we can get back to our
-// initial parsed state if needed
-// TODO Do I need this? – AP
-// let $root = $.root().clone()
-if(opts.stripUnlikelyCandidates){$=stripUnlikelyCandidates($);}$=convertToParagraphs$$1($);$=scoreContent$$1($,opts.weightNodes);var $topCandidate=findTopCandidate$$1($);return $topCandidate;}var GenericContentExtractor={defaultOpts:{stripUnlikelyCandidates:true,weightNodes:true,cleanConditionally:true},// Extract the content for this resource - initially, pass in our
-// most restrictive opts which will return the highest quality
-// content. On each failure, retry with slightly more lax opts.
+// TODO: Perhaps have these scale based on their odds of being quality?
+var NEGATIVE_SCORE_HINTS$2=['adbox','advert','author','bio','bookmark','bottom','byline','clear','com-','combx','comment','comment\\B','contact','copy','credit','crumb','date','deck','excerpt','featured',// tnr.com has a featured_content which throws us off
+'foot','footer','footnote','graf','head','info','infotext',// newscientist.com copyright
+'instapaper_ignore','jump','linebreak','link','masthead','media','meta','modal','outbrain',// slate.com junk
+'promo','pr_',// autoblog - press release
+'related','respond','roundcontent',// lifehacker restricted content warning
+'scroll','secondary','share','shopping','shoutbox','side','sidebar','sponsor','stamp','sub','summary','tags','tools','widget'];// The above list, joined into a matching regular expression
+var NEGATIVE_SCORE_RE$2=new RegExp(NEGATIVE_SCORE_HINTS$2.join('|'),'i');// XPath to try to determine if a page is wordpress. Not always successful.
+var IS_WP_SELECTOR$1='meta[name=generator][value^=WordPress]';// Match a digit. Pretty clear.
+// A list of words that, if found in link text or URLs, likely mean that
+// this link is not a next page link.
+// Match any phrase that looks like it could be page, or paging, or pagination
+var PAGE_RE$1=new RegExp('pag(e|ing|inat)','i');// Match any link text/classname/id that looks like it could mean the next
+// page. Things like: next, continue, >, >>, » but not >|, »| as those can
+// mean last page.
+// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\|]|$)|»([^\|]|$))', 'i');
+// Match any link text/classname/id that looks like it is an end link: things
+// like "first", "last", "end", etc.
+// Match any link text/classname/id that looks like it means the previous
+// page.
+// Match 2 or more consecutive tags
+// Match 1 BR tag.
+// A list of all of the block level tags known in HTML5 and below. Taken from
+// http://bit.ly/qneNIT
+var BLOCK_LEVEL_TAGS$2=['article','aside','blockquote','body','br','button','canvas','caption','col','colgroup','dd','div','dl','dt','embed','fieldset','figcaption','figure','footer','form','h1','h2','h3','h4','h5','h6','header','hgroup','hr','li','map','object','ol','output','p','pre','progress','section','table','tbody','textarea','tfoot','th','thead','tr','ul','video'];var BLOCK_LEVEL_TAGS_RE$2=new RegExp('^('+BLOCK_LEVEL_TAGS$2.join('|')+')$','i');// The removal is implemented as a blacklist and whitelist, this test finds
+// blacklisted elements that aren't whitelisted. We do this all in one
+// expression-both because it's only one pass, and because this skips the
+// serialization for whitelisted nodes.
+var candidatesBlacklist$2=UNLIKELY_CANDIDATES_BLACKLIST$2.join('|');var CANDIDATES_BLACKLIST$2=new RegExp(candidatesBlacklist$2,'i');var candidatesWhitelist$2=UNLIKELY_CANDIDATES_WHITELIST$2.join('|');var CANDIDATES_WHITELIST$2=new RegExp(candidatesWhitelist$2,'i');function stripUnlikelyCandidates$1($){// Loop through the provided document and remove any non-link nodes
+// that are unlikely candidates for article content.
//
-// :param return_type: string. If "node", should return the content
-// as a cheerio node rather than as an HTML string.
+// Links are ignored because there are very often links to content
+// that are identified as non-body-content, but may be inside
+// article-like content.
//
-// Opts:
-// stripUnlikelyCandidates: Remove any elements that match
-// non-article-like criteria first.(Like, does this element
-// have a classname of "comment")
+// :param $: a cheerio object to strip nodes from
+// :return $: the cleaned cheerio object
+$('*').not('a').each(function(index,node){var $node=$(node);var classes=$node.attr('class');var id=$node.attr('id');if(!id&&!classes)return;var classAndId=(classes||'')+' '+(id||'');if(CANDIDATES_WHITELIST$2.test(classAndId)){return;}else if(CANDIDATES_BLACKLIST$2.test(classAndId)){$node.remove();}});return $;}// ## NOTES:
+// Another good candidate for refactoring/optimizing.
+// Very imperative code, I don't love it. - AP
+// Given cheerio object, convert consecutive tags into
+// tags instead.
//
-// weightNodes: Modify an elements score based on whether it has
-// certain classNames or IDs. Examples: Subtract if a node has
-// a className of 'comment', Add if a node has an ID of
-// 'entry-content'.
+// :param $: A cheerio object
+function brsToPs$$1($){var collapsing=false;$('br').each(function(index,element){var $element=$(element);var nextElement=$element.next().get(0);if(nextElement&&nextElement.tagName.toLowerCase()==='br'){collapsing=true;$element.remove();}else if(collapsing){collapsing=false;// $(element).replaceWith('')
+paragraphize$1(element,$,true);}});return $;}// Given a node, turn it into a P if it is not already a P, and
+// make sure it conforms to the constraints of a P tag (I.E. does
+// not contain any other block tags.)
//
-// cleanConditionally: Clean the node to return of some
-// superfluous content. Things like forms, ads, etc.
-extract:function extract(_ref,opts){var $=_ref.$,html=_ref.html,title=_ref.title,url=_ref.url;opts=_extends$1({},this.defaultOpts,opts);$=$||cheerio$1.load(html);// Cascade through our extraction-specific opts in an ordered fashion,
-// turning them off as we try to extract content.
-var node=this.getContentNode($,title,url,opts);if(nodeIsSufficient(node)){return this.cleanAndReturnNode(node,$);}// We didn't succeed on first pass, one by one disable our
-// extraction opts and try again.
-var _iteratorNormalCompletion=true;var _didIteratorError=false;var _iteratorError=undefined;try{for(var _iterator=_getIterator$1(_Reflect$ownKeys$1(opts).filter(function(k){return opts[k]===true;})),_step;!(_iteratorNormalCompletion=(_step=_iterator.next()).done);_iteratorNormalCompletion=true){var key=_step.value;opts[key]=false;$=cheerio$1.load(html);node=this.getContentNode($,title,url,opts);if(nodeIsSufficient(node)){break;}}}catch(err){_didIteratorError=true;_iteratorError=err;}finally{try{if(!_iteratorNormalCompletion&&_iterator.return){_iterator.return();}}finally{if(_didIteratorError){throw _iteratorError;}}}return this.cleanAndReturnNode(node,$);},// Get node given current options
-getContentNode:function getContentNode($,title,url,opts){return extractCleanNode(extractBestNode($,opts),{$:$,cleanConditionally:opts.cleanConditionally,title:title,url:url});},// Once we got here, either we're at our last-resort node, or
-// we broke early. Make sure we at least have -something- before we
-// move forward.
-cleanAndReturnNode:function cleanAndReturnNode(node,$){if(!node){return null;}return normalizeSpaces($.html(node));// if return_type == "html":
-// return normalize_spaces(node_to_html(node))
-// else:
-// return node
-}};// TODO: It would be great if we could merge the meta and selector lists into
-// a list of objects, because we could then rank them better. For example,
-// .hentry .entry-title is far better suited than .
-// An ordered list of meta tag names that denote likely article titles. All
-// attributes should be lowercase for faster case-insensitive matching. From
-// most distinct to least distinct.
-var STRONG_TITLE_META_TAGS=['tweetmeme-title','dc.title','rbtitle','headline','title'];// og:title is weak because it typically contains context that we don't like,
-// for example the source site's name. Gotta get that brand into facebook!
-var WEAK_TITLE_META_TAGS=['og:title'];// An ordered list of XPath Selectors to find likely article titles. From
-// most explicit to least explicit.
+// If the node is a , it treats the following inline siblings
+// as if they were its children.
//
-// Note - this does not use classes like CSS. This checks to see if the string
-// exists in the className, which is not as accurate as .className (which
-// splits on spaces/endlines), but for our purposes it's close enough. The
-// speed tradeoff is worth the accuracy hit.
-var STRONG_TITLE_SELECTORS=['.hentry .entry-title','h1#articleHeader','h1.articleHeader','h1.article','.instapaper_title','#meebo-title'];var WEAK_TITLE_SELECTORS=['article h1','#entry-title','.entry-title','#entryTitle','#entrytitle','.entryTitle','.entrytitle','#articleTitle','.articleTitle','post post-title','h1.title','h2.article','h1','html head title','title'];var GenericTitleExtractor={extract:function extract(_ref){var $=_ref.$,url=_ref.url,metaCache=_ref.metaCache;// First, check to see if we have a matching meta tag that we can make
-// use of that is strongly associated with the headline.
-var title=void 0;title=extractFromMeta$$1($,STRONG_TITLE_META_TAGS,metaCache);if(title)return cleanTitle$$1(title,{url:url,$:$});// Second, look through our content selectors for the most likely
-// article title that is strongly associated with the headline.
-title=extractFromSelectors$$1($,STRONG_TITLE_SELECTORS);if(title)return cleanTitle$$1(title,{url:url,$:$});// Third, check for weaker meta tags that may match.
-title=extractFromMeta$$1($,WEAK_TITLE_META_TAGS,metaCache);if(title)return cleanTitle$$1(title,{url:url,$:$});// Last, look for weaker selector tags that may match.
-title=extractFromSelectors$$1($,WEAK_TITLE_SELECTORS);if(title)return cleanTitle$$1(title,{url:url,$:$});// If no matches, return an empty string
-return'';}};// An ordered list of meta tag names that denote likely article authors. All
-// attributes should be lowercase for faster case-insensitive matching. From
-// most distinct to least distinct.
-//
-// Note: "author" is too often the -developer- of the page, so it is not
-// added here.
-var AUTHOR_META_TAGS=['byl','clmst','dc.author','dcsext.author','dc.creator','rbauthors','authors'];var AUTHOR_MAX_LENGTH=300;// An ordered list of XPath Selectors to find likely article authors. From
-// most explicit to least explicit.
-//
-// Note - this does not use classes like CSS. This checks to see if the string
-// exists in the className, which is not as accurate as .className (which
-// splits on spaces/endlines), but for our purposes it's close enough. The
-// speed tradeoff is worth the accuracy hit.
-var AUTHOR_SELECTORS=['.entry .entry-author','.author.vcard .fn','.author .vcard .fn','.byline.vcard .fn','.byline .vcard .fn','.byline .by .author','.byline .by','.byline .author','.post-author.vcard','.post-author .vcard','a[rel=author]','#by_author','.by_author','#entryAuthor','.entryAuthor','.byline a[href*=author]','#author .authorname','.author .authorname','#author','.author','.articleauthor','.ArticleAuthor','.byline'];// An ordered list of Selectors to find likely article authors, with
-// regular expression for content.
-var bylineRe=/^[\n\s]*By/i;var BYLINE_SELECTORS_RE=[['#byline',bylineRe],['.byline',bylineRe]];var GenericAuthorExtractor={extract:function extract(_ref){var $=_ref.$,metaCache=_ref.metaCache;var author=void 0;// First, check to see if we have a matching
-// meta tag that we can make use of.
-author=extractFromMeta$$1($,AUTHOR_META_TAGS,metaCache);if(author&&author.length2&&arguments[2]!==undefined?arguments[2]:false;var $node=$(node);if(br){var sibling=node.nextSibling;var p=$('');// while the next node is text or not a block level element
+// append it to a new p node
+while(sibling&&!(sibling.tagName&&BLOCK_LEVEL_TAGS_RE$2.test(sibling.tagName))){var nextSibling=sibling.nextSibling;$(sibling).appendTo(p);sibling=nextSibling;}$node.replaceWith(p);$node.remove();return $;}return $;}function convertDivs$1($){$('div').each(function(index,div){var $div=$(div);var convertable=$div.children(DIV_TO_P_BLOCK_TAGS$2).length===0;if(convertable){convertNodeTo$$1($div,$,'p');}});return $;}function convertSpans$2($){$('span').each(function(index,span){var $span=$(span);var convertable=$span.parents('p, div').length===0;if(convertable){convertNodeTo$$1($span,$,'p');}});return $;}// Loop through the provided doc, and convert any p-like elements to
+// actual paragraph tags.
//
-// return None
-// An ordered list of meta tag names that denote likely article leading images.
-// All attributes should be lowercase for faster case-insensitive matching.
-// From most distinct to least distinct.
-var LEAD_IMAGE_URL_META_TAGS=['og:image','twitter:image','image_src'];var LEAD_IMAGE_URL_SELECTORS=['link[rel=image_src]'];var POSITIVE_LEAD_IMAGE_URL_HINTS=['upload','wp-content','large','photo','wp-image'];var POSITIVE_LEAD_IMAGE_URL_HINTS_RE=new RegExp(POSITIVE_LEAD_IMAGE_URL_HINTS.join('|'),'i');var NEGATIVE_LEAD_IMAGE_URL_HINTS=['spacer','sprite','blank','throbber','gradient','tile','bg','background','icon','social','header','hdr','advert','spinner','loader','loading','default','rating','share','facebook','twitter','theme','promo','ads','wp-includes'];var NEGATIVE_LEAD_IMAGE_URL_HINTS_RE=new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'),'i');var GIF_RE=/\.gif(\?.*)?$/i;var JPG_RE=/\.jpe?g(\?.*)?$/i;function getSig($node){return($node.attr('class')||'')+' '+($node.attr('id')||'');}// Scores image urls based on a variety of heuristics.
-function scoreImageUrl(url){url=url.trim();var score=0;if(POSITIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)){score+=20;}if(NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)){score-=20;}// TODO: We might want to consider removing this as
-// gifs are much more common/popular than they once were
-if(GIF_RE.test(url)){score-=10;}if(JPG_RE.test(url)){score+=10;}// PNGs are neutral.
-return score;}// Alt attribute usually means non-presentational image.
-function scoreAttr($img){if($img.attr('alt')){return 5;}return 0;}// Look through our parent and grandparent for figure-like
-// container elements, give a bonus if we find them
-function scoreByParents($img){var score=0;var $figParent=$img.parents('figure').first();if($figParent.length===1){score+=25;}var $parent=$img.parent();var $gParent=void 0;if($parent.length===1){$gParent=$parent.parent();}[$parent,$gParent].forEach(function($node){if(PHOTO_HINTS_RE$1.test(getSig($node))){score+=15;}});return score;}// Look at our immediate sibling and see if it looks like it's a
-// caption. Bonus if so.
-function scoreBySibling($img){var score=0;var $sibling=$img.next();var sibling=$sibling.get(0);if(sibling&&sibling.tagName==='figcaption'){score+=25;}if(PHOTO_HINTS_RE$1.test(getSig($sibling))){score+=15;}return score;}function scoreByDimensions($img){var score=0;var width=parseFloat($img.attr('width'));var height=parseFloat($img.attr('height'));var src=$img.attr('src');// Penalty for skinny images
-if(width&&width<=50){score-=50;}// Penalty for short images
-if(height&&height<=50){score-=50;}if(width&&height&&!src.includes('sprite')){var area=width*height;if(area<5000){// Smaller than 50 x 100
-score-=100;}else{score+=Math.round(area/1000);}}return score;}function scoreByPosition($imgs,index){return $imgs.length/2-index;}// Given a resource, try to find the lead image URL from within
-// it. Like content and next page extraction, uses a scoring system
-// to determine what the most likely image may be. Short circuits
-// on really probable things like og:image meta tags.
+// Things fitting this criteria:
+// * Multiple consecutive tags.
+// * tags without block level elements inside of them
+// * tags who are not children of or tags.
//
-// Potential signals to still take advantage of:
-// * domain
-// * weird aspect ratio
-var GenericLeadImageUrlExtractor={extract:function extract(_ref){var $=_ref.$,content=_ref.content,metaCache=_ref.metaCache;var cleanUrl=void 0;// Check to see if we have a matching meta tag that we can make use of.
-// Moving this higher because common practice is now to use large
-// images on things like Open Graph or Twitter cards.
-// images usually have for things like Open Graph.
-var imageUrl=extractFromMeta$$1($,LEAD_IMAGE_URL_META_TAGS,metaCache,false);if(imageUrl){cleanUrl=clean$1(imageUrl);if(cleanUrl)return cleanUrl;}// Next, try to find the "best" image via the content.
-// We'd rather not have to fetch each image and check dimensions,
-// so try to do some analysis and determine them instead.
-var imgs=$('img',content).toArray();var imgScores={};imgs.forEach(function(img,index){var $img=$(img);var src=$img.attr('src');if(!src)return;var score=scoreImageUrl(src);score+=scoreAttr($img);score+=scoreByParents($img);score+=scoreBySibling($img);score+=scoreByDimensions($img);score+=scoreByPosition(imgs,index);imgScores[src]=score;});var _Reflect$ownKeys$redu=_Reflect$ownKeys$1(imgScores).reduce(function(acc,key){return imgScores[key]>acc[1]?[key,imgScores[key]]:acc;},[null,0]),_Reflect$ownKeys$redu2=_slicedToArray$1(_Reflect$ownKeys$redu,2),topUrl=_Reflect$ownKeys$redu2[0],topScore=_Reflect$ownKeys$redu2[1];if(topScore>0){cleanUrl=clean$1(topUrl);if(cleanUrl)return cleanUrl;}// If nothing else worked, check to see if there are any really
-// probable nodes in the doc, like .
-var _iteratorNormalCompletion=true;var _didIteratorError=false;var _iteratorError=undefined;try{for(var _iterator=_getIterator$1(LEAD_IMAGE_URL_SELECTORS),_step;!(_iteratorNormalCompletion=(_step=_iterator.next()).done);_iteratorNormalCompletion=true){var selector=_step.value;var $node=$(selector).first();var src=$node.attr('src');if(src){cleanUrl=clean$1(src);if(cleanUrl)return cleanUrl;}var href=$node.attr('href');if(href){cleanUrl=clean$1(href);if(cleanUrl)return cleanUrl;}var value=$node.attr('value');if(value){cleanUrl=clean$1(value);if(cleanUrl)return cleanUrl;}}}catch(err){_didIteratorError=true;_iteratorError=err;}finally{try{if(!_iteratorNormalCompletion&&_iterator.return){_iterator.return();}}finally{if(_didIteratorError){throw _iteratorError;}}}return null;}};// def extract(self):
-// """
-// # First, try to find the "best" image via the content.
-// # We'd rather not have to fetch each image and check dimensions,
-// # so try to do some analysis and determine them instead.
-// content = self.extractor.extract_content(return_type="node")
-// imgs = content.xpath('.//img')
-// img_scores = defaultdict(int)
-// logger.debug('Scoring %d images from content', len(imgs))
-// for (i, img) in enumerate(imgs):
-// img_score = 0
+// :param $: A cheerio object to search
+// :return cheerio object with new p elements
+// (By-reference mutation, though. Returned just for convenience.)
+function convertToParagraphs$$1($){$=brsToPs$$1($);$=convertDivs$1($);$=convertSpans$2($);return $;}function convertNodeTo$$1($node,$){var tag=arguments.length>2&&arguments[2]!==undefined?arguments[2]:'p';var node=$node.get(0);if(!node){return $;}var attrs=getAttrs$1(node)||{};var attribString=_Reflect$ownKeys$1(attrs).map(function(key){return key+'='+attrs[key];}).join(' ');var html=void 0;if($.browser){// In the browser, the contents of noscript tags aren't rendered, therefore
+// transforms on the noscript tag (commonly used for lazy-loading) don't work
+// as expected. This test case handles that
+html=node.tagName.toLowerCase()==='noscript'?$node.text():$node.html();}else{html=$node.contents();}$node.replaceWith('<'+tag+' '+attribString+'>'+html+''+tag+'>');return $;}function cleanForHeight$1($img,$){var height=parseInt($img.attr('height'),10);var width=parseInt($img.attr('width'),10)||20;// Remove images that explicitly have very small heights or
+// widths, because they are most likely shims or icons,
+// which aren't very useful for reading.
+if((height||20)<10||width<10){$img.remove();}else if(height){// Don't ever specify a height on images, so that we can
+// scale with respect to width without screwing up the
+// aspect ratio.
+$img.removeAttr('height');}return $;}// Cleans out images where the source string matches transparent/spacer/etc
+// TODO This seems very aggressive - AP
+function removeSpacers$1($img,$){if(SPACER_RE$1.test($img.attr('src'))){$img.remove();}return $;}function cleanImages$1($article,$){$article.find('img').each(function(index,img){var $img=$(img);cleanForHeight$1($img,$);removeSpacers$1($img,$);});return $;}function markToKeep$1(article,$,url){var tags=arguments.length>3&&arguments[3]!==undefined?arguments[3]:[];if(tags.length===0){tags=KEEP_SELECTORS$1;}if(url){var _URL$parse=URL$1.parse(url),protocol=_URL$parse.protocol,hostname=_URL$parse.hostname;tags=[].concat(_toConsumableArray$1(tags),['iframe[src^="'+protocol+'//'+hostname+'"]']);}$(tags.join(','),article).addClass(KEEP_CLASS$1);return $;}function stripJunkTags$1(article,$){var tags=arguments.length>2&&arguments[2]!==undefined?arguments[2]:[];if(tags.length===0){tags=STRIP_OUTPUT_TAGS$1;}// Remove matching elements, but ignore
+// any element with a class of mercury-parser-keep
+$(tags.join(','),article).not('.'+KEEP_CLASS$1).remove();// Remove the mercury-parser-keep class from result
+$('.'+KEEP_CLASS$1,article).removeClass(KEEP_CLASS$1);return $;}// H1 tags are typically the article title, which should be extracted
+// by the title extractor instead. If there's less than 3 of them (<3),
+// strip them. Otherwise, turn 'em into H2s.
+function cleanHOnes$$1(article,$){var $hOnes=$('h1',article);if($hOnes.length<3){$hOnes.each(function(index,node){return $(node).remove();});}else{$hOnes.each(function(index,node){convertNodeTo$$1($(node),$,'h2');});}return $;}function removeAllButWhitelist$1($article){$article.find('*').each(function(index,node){var attrs=getAttrs$1(node);setAttrs$1(node,_Reflect$ownKeys$1(attrs).reduce(function(acc,attr){if(WHITELIST_ATTRS_RE$1.test(attr)){return _extends$1({},acc,_defineProperty$1({},attr,attrs[attr]));}return acc;},{}));});return $article;}// function removeAttrs(article, $) {
+// REMOVE_ATTRS.forEach((attr) => {
+// $(`[${attr}]`, article).removeAttr(attr);
+// });
+// }
+// Remove attributes like style or align
+function cleanAttributes$$1($article){// Grabbing the parent because at this point
+// $article will be wrapped in a div which will
+// have a score set on it.
+return removeAllButWhitelist$1($article.parent().length?$article.parent():$article);}function removeEmpty$1($article,$){$article.find('p').each(function(index,p){var $p=$(p);if($p.find('iframe, img').length===0&&$p.text().trim()==='')$p.remove();});return $;}// // CONTENT FETCHING CONSTANTS ////
+// A list of strings that can be considered unlikely candidates when
+// extracting content from a resource. These strings are joined together
+// and then tested for existence using re:test, so may contain simple,
+// non-pipe style regular expression queries if necessary.
+var UNLIKELY_CANDIDATES_BLACKLIST$1$1=['ad-break','adbox','advert','addthis','agegate','aux','blogger-labels','combx','comment','conversation','disqus','entry-unrelated','extra','foot','form','header','hidden','loader','login',// Note: This can hit 'blogindex'.
+'menu','meta','nav','pager','pagination','predicta',// readwriteweb inline ad box
+'presence_control_external',// lifehacker.com container full of false positives
+'popup','printfriendly','related','remove','remark','rss','share','shoutbox','sidebar','sociable','sponsor','tools'];// A list of strings that can be considered LIKELY candidates when
+// extracting content from a resource. Essentially, the inverse of the
+// blacklist above - if something matches both blacklist and whitelist,
+// it is kept. This is useful, for example, if something has a className
+// of "rss-content entry-content". It matched 'rss', so it would normally
+// be removed, however, it's also the entry content, so it should be left
+// alone.
//
-// if not 'src' in img.attrib:
-// logger.debug('No src attribute found')
-// continue
+// These strings are joined together and then tested for existence using
+// re:test, so may contain simple, non-pipe style regular expression queries
+// if necessary.
+var UNLIKELY_CANDIDATES_WHITELIST$1$1=['and','article','body','blogindex','column','content','entry-content-asset','format',// misuse of form
+'hfeed','hentry','hatom','main','page','posts','shadow'];// A list of tags which, if found inside, should cause a to NOT
+// be turned into a paragraph tag. Shallow div tags without these elements
+// should be turned into tags.
+var DIV_TO_P_BLOCK_TAGS$1$1=['a','blockquote','dl','div','img','p','pre','table'].join(',');// A list of tags that should be ignored when trying to find the top candidate
+// for a document.
+var NON_TOP_CANDIDATE_TAGS$1$1=['br','b','i','label','hr','area','base','basefont','input','img','link','meta'];var NON_TOP_CANDIDATE_TAGS_RE$1$1=new RegExp('^('+NON_TOP_CANDIDATE_TAGS$1$1.join('|')+')$','i');// A list of selectors that specify, very clearly, either hNews or other
+// very content-specific style content, like Blogger templates.
+// More examples here: http://microformats.org/wiki/blog-post-formats
+var HNEWS_CONTENT_SELECTORS$1$1=[['.hentry','.entry-content'],['entry','.entry-content'],['.entry','.entry_content'],['.post','.postbody'],['.post','.post_body'],['.post','.post-body']];var PHOTO_HINTS$1$1=['figure','photo','image','caption'];var PHOTO_HINTS_RE$1$1=new RegExp(PHOTO_HINTS$1$1.join('|'),'i');// A list of strings that denote a positive scoring for this content as being
+// an article container. Checked against className and id.
//
-// try:
-// parsed_img = urlparse(img.attrib['src'])
-// img_path = parsed_img.path.lower()
-// except ValueError:
-// logger.debug('ValueError getting img path.')
-// continue
-// logger.debug('Image path is %s', img_path)
+// TODO: Perhaps have these scale based on their odds of being quality?
+var POSITIVE_SCORE_HINTS$1$1=['article','articlecontent','instapaper_body','blog','body','content','entry-content-asset','entry','hentry','main','Normal','page','pagination','permalink','post','story','text','[-_]copy',// usatoday
+'\\Bcopy'];// The above list, joined into a matching regular expression
+var POSITIVE_SCORE_RE$1$1=new RegExp(POSITIVE_SCORE_HINTS$1$1.join('|'),'i');// Readability publisher-specific guidelines
+var READABILITY_ASSET$1$1=new RegExp('entry-content-asset','i');// A list of strings that denote a negative scoring for this content as being
+// an article container. Checked against className and id.
//
-// if constants.POSITIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):
-// logger.debug('Positive URL hints match. Adding 20.')
-// img_score += 20
+// TODO: Perhaps have these scale based on their odds of being quality?
+var NEGATIVE_SCORE_HINTS$1$1=['adbox','advert','author','bio','bookmark','bottom','byline','clear','com-','combx','comment','comment\\B','contact','copy','credit','crumb','date','deck','excerpt','featured',// tnr.com has a featured_content which throws us off
+'foot','footer','footnote','graf','head','info','infotext',// newscientist.com copyright
+'instapaper_ignore','jump','linebreak','link','masthead','media','meta','modal','outbrain',// slate.com junk
+'promo','pr_',// autoblog - press release
+'related','respond','roundcontent',// lifehacker restricted content warning
+'scroll','secondary','share','shopping','shoutbox','side','sidebar','sponsor','stamp','sub','summary','tags','tools','widget'];// The above list, joined into a matching regular expression
+var NEGATIVE_SCORE_RE$1$1=new RegExp(NEGATIVE_SCORE_HINTS$1$1.join('|'),'i');// Match a digit. Pretty clear.
+// Match 2 or more consecutive tags
+// Match 1 BR tag.
+// A list of all of the block level tags known in HTML5 and below. Taken from
+// http://bit.ly/qneNIT
+// The removal is implemented as a blacklist and whitelist, this test finds
+// blacklisted elements that aren't whitelisted. We do this all in one
+// expression-both because it's only one pass, and because this skips the
+// serialization for whitelisted nodes.
+var candidatesBlacklist$1$1=UNLIKELY_CANDIDATES_BLACKLIST$1$1.join('|');var candidatesWhitelist$1$1=UNLIKELY_CANDIDATES_WHITELIST$1$1.join('|');var PARAGRAPH_SCORE_TAGS$1$1=new RegExp('^(p|li|span|pre)$','i');var CHILD_CONTENT_TAGS$1$1=new RegExp('^(td|blockquote|ol|ul|dl)$','i');var BAD_TAGS$1$1=new RegExp('^(address|form)$','i');// Get the score of a node based on its className and id.
+function getWeight$1(node){var classes=node.attr('class');var id=node.attr('id');var score=0;if(id){// if id exists, try to score on both positive and negative
+if(POSITIVE_SCORE_RE$1$1.test(id)){score+=25;}if(NEGATIVE_SCORE_RE$1$1.test(id)){score-=25;}}if(classes){if(score===0){// if classes exist and id did not contribute to score
+// try to score on both positive and negative
+if(POSITIVE_SCORE_RE$1$1.test(classes)){score+=25;}if(NEGATIVE_SCORE_RE$1$1.test(classes)){score-=25;}}// even if score has been set by id, add score for
+// possible photo matches
+// "try to keep photos if we can"
+if(PHOTO_HINTS_RE$1$1.test(classes)){score+=10;}// add 25 if class matches entry-content-asset,
+// a class apparently instructed for use in the
+// Readability publisher guidelines
+// https://www.readability.com/developers/guidelines
+if(READABILITY_ASSET$1$1.test(classes)){score+=25;}}return score;}// returns the score of a node based on
+// the node's score attribute
+// returns null if no score set
+function getScore$1($node){// console.log("NODE", $node, $node.attr('score'))
+return parseFloat($node.attr('score'))||null;}// return 1 for every comma in text
+function scoreCommas$1(text){return(text.match(/,/g)||[]).length;}var idkRe$1=new RegExp('^(p|pre)$','i');function scoreLength$1(textLength){var tagName=arguments.length>1&&arguments[1]!==undefined?arguments[1]:'p';var chunks=textLength/50;if(chunks>0){var lengthBonus=void 0;// No idea why p or pre are being tamped down here
+// but just following the source for now
+// Not even sure why tagName is included here,
+// since this is only being called from the context
+// of scoreParagraph
+if(idkRe$1.test(tagName)){lengthBonus=chunks-2;}else{lengthBonus=chunks-1.25;}return Math.min(Math.max(lengthBonus,0),3);}return 0;}// Score a paragraph using various methods. Things like number of
+// commas, etc. Higher is better.
+function scoreParagraph$$1(node){var score=1;var text=node.text().trim();var textLength=text.length;// If this paragraph is less than 25 characters, don't count it.
+if(textLength<25){return 0;}// Add points for any commas within this paragraph
+score+=scoreCommas$1(text);// For every 50 characters in this paragraph, add another point. Up
+// to 3 points.
+score+=scoreLength$1(textLength);// Articles can end with short paragraphs when people are being clever
+// but they can also end with short paragraphs setting up lists of junk
+// that we strip. This negative tweaks junk setup paragraphs just below
+// the cutoff threshold.
+if(text.slice(-1)===':'){score-=1;}return score;}function setScore$1($node,$,score){$node.attr('score',score);return $node;}function addScore$$1($node,$,amount){try{var score=getOrInitScore$$1($node,$)+amount;setScore$1($node,$,score);}catch(e){// Ignoring; error occurs in scoreNode
+}return $node;}// Adds 1/4 of a child's score to its parent
+function addToParent$$1(node,$,score){var parent=node.parent();if(parent){addScore$$1(parent,$,score*0.25);}return node;}// gets and returns the score if it exists
+// if not, initializes a score based on
+// the node's tag type
+function getOrInitScore$$1($node,$){var weightNodes=arguments.length>2&&arguments[2]!==undefined?arguments[2]:true;var score=getScore$1($node);if(score){return score;}score=scoreNode$$1($node);if(weightNodes){score+=getWeight$1($node);}addToParent$$1($node,$,score);return score;}// Score an individual node. Has some smarts for paragraphs, otherwise
+// just scores based on tag.
+function scoreNode$$1($node){var _$node$get=$node.get(0),tagName=_$node$get.tagName;// TODO: Consider ordering by most likely.
+// E.g., if divs are a more common tag on a page,
+// Could save doing that regex test on every node – AP
+if(PARAGRAPH_SCORE_TAGS$1$1.test(tagName)){return scoreParagraph$$1($node);}else if(tagName.toLowerCase()==='div'){return 5;}else if(CHILD_CONTENT_TAGS$1$1.test(tagName)){return 3;}else if(BAD_TAGS$1$1.test(tagName)){return-3;}else if(tagName.toLowerCase()==='th'){return-5;}return 0;}function convertSpans$1$1($node,$){if($node.get(0)){var _$node$get=$node.get(0),tagName=_$node$get.tagName;if(tagName==='span'){// convert spans to divs
+convertNodeTo$$1($node,$,'div');}}}function addScoreTo$1($node,$,score){if($node){convertSpans$1$1($node,$);addScore$$1($node,$,score);}}function scorePs$1($,weightNodes){$('p, pre').not('[score]').each(function(index,node){// The raw score for this paragraph, before we add any parent/child
+// scores.
+var $node=$(node);$node=setScore$1($node,$,getOrInitScore$$1($node,$,weightNodes));var $parent=$node.parent();var rawScore=scoreNode$$1($node);addScoreTo$1($parent,$,rawScore,weightNodes);if($parent){// Add half of the individual content score to the
+// grandparent
+addScoreTo$1($parent.parent(),$,rawScore/2,weightNodes);}});return $;}// score content. Parents get the full value of their children's
+// content score, grandparents half
+function scoreContent$$1($){var weightNodes=arguments.length>1&&arguments[1]!==undefined?arguments[1]:true;// First, look for special hNews based selectors and give them a big
+// boost, if they exist
+HNEWS_CONTENT_SELECTORS$1$1.forEach(function(_ref){var _ref2=_slicedToArray$1(_ref,2),parentSelector=_ref2[0],childSelector=_ref2[1];$(parentSelector+' '+childSelector).each(function(index,node){addScore$$1($(node).parent(parentSelector),$,80);});});// Doubling this again
+// Previous solution caused a bug
+// in which parents weren't retaining
+// scores. This is not ideal, and
+// should be fixed.
+scorePs$1($,weightNodes);scorePs$1($,weightNodes);return $;}var NORMALIZE_RE$1=/\s{2,}/g;function normalizeSpaces$1(text){return text.replace(NORMALIZE_RE$1,' ').trim();}// Given a node type to search for, and a list of regular expressions,
+// look to see if this extraction can be found in the URL. Expects
+// that each expression in r_list will return group(1) as the proper
+// string to be cleaned.
+// Only used for date_published currently.
+function extractFromUrl$1(url,regexList){var matchRe=regexList.find(function(re){return re.test(url);});// const matchRe = null
+if(matchRe){return matchRe.exec(url)[1];}return null;}// An expression that looks to try to find the page digit within a URL, if
+// it exists.
+// Matches:
+// page=1
+// pg=1
+// p=1
+// paging=12
+// pag=7
+// pagination/1
+// paging/88
+// pa/83
+// p/11
//
-// if constants.NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):
-// logger.debug('Negative URL hints match. Subtracting 20.')
-// img_score -= 20
+// Does not match:
+// pg=102
+// page:2
+var PAGE_IN_HREF_RE$1=new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})','i');var HAS_ALPHA_RE$1=/[a-z]/i;var IS_ALPHA_RE$1=/^[a-z]+$/i;var IS_DIGIT_RE$1=/^[0-9]+$/i;function pageNumFromUrl$1(url){var matches=url.match(PAGE_IN_HREF_RE$1);if(!matches)return null;var pageNum=parseInt(matches[6],10);// Return pageNum < 100, otherwise
+// return null
+return pageNum<100?pageNum:null;}function removeAnchor$1(url){return url.split('#')[0].replace(/\/$/,'');}function isGoodSegment$1(segment,index,firstSegmentHasLetters){var goodSegment=true;// If this is purely a number, and it's the first or second
+// url_segment, it's probably a page number. Remove it.
+if(index<2&&IS_DIGIT_RE$1.test(segment)&&segment.length<3){goodSegment=true;}// If this is the first url_segment and it's just "index",
+// remove it
+if(index===0&&segment.toLowerCase()==='index'){goodSegment=false;}// If our first or second url_segment is smaller than 3 characters,
+// and the first url_segment had no alphas, remove it.
+if(index<2&&segment.length<3&&!firstSegmentHasLetters){goodSegment=false;}return goodSegment;}// Take a URL, and return the article base of said URL. That is, no
+// pagination data exists in it. Useful for comparing to other links
+// that might have pagination data within them.
+function articleBaseUrl$1(url,parsed){var parsedUrl=parsed||URL$1.parse(url);var protocol=parsedUrl.protocol,host=parsedUrl.host,path=parsedUrl.path;var firstSegmentHasLetters=false;var cleanedSegments=path.split('/').reverse().reduce(function(acc,rawSegment,index){var segment=rawSegment;// Split off and save anything that looks like a file type.
+if(segment.includes('.')){var _segment$split=segment.split('.'),_segment$split2=_slicedToArray$1(_segment$split,2),possibleSegment=_segment$split2[0],fileExt=_segment$split2[1];if(IS_ALPHA_RE$1.test(fileExt)){segment=possibleSegment;}}// If our first or second segment has anything looking like a page
+// number, remove it.
+if(PAGE_IN_HREF_RE$1.test(segment)&&index<2){segment=segment.replace(PAGE_IN_HREF_RE$1,'');}// If we're on the first segment, check to see if we have any
+// characters in it. The first segment is actually the last bit of
+// the URL, and this will be helpful to determine if we're on a URL
+// segment that looks like "/2/" for example.
+if(index===0){firstSegmentHasLetters=HAS_ALPHA_RE$1.test(segment);}// If it's not marked for deletion, push it to cleaned_segments.
+if(isGoodSegment$1(segment,index,firstSegmentHasLetters)){acc.push(segment);}return acc;},[]);return protocol+'//'+host+cleanedSegments.reverse().join('/');}// Given a string, return True if it appears to have an ending sentence
+// within it, false otherwise.
+var SENTENCE_END_RE$1=new RegExp('.( |$)');function hasSentenceEnd$1(text){return SENTENCE_END_RE$1.test(text);}function excerptContent$1(content){var words=arguments.length>1&&arguments[1]!==undefined?arguments[1]:10;return content.trim().split(/\s+/).slice(0,words).join(' ');}// Now that we have a top_candidate, look through the siblings of
+// it to see if any of them are decently scored. If they are, they
+// may be split parts of the content (Like two divs, a preamble and
+// a body.) Example:
+// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14
+function mergeSiblings$1($candidate,topScore,$){if(!$candidate.parent().length){return $candidate;}var siblingScoreThreshold=Math.max(10,topScore*0.25);var wrappingDiv=$('');$candidate.parent().children().each(function(index,sibling){var $sibling=$(sibling);// Ignore tags like BR, HR, etc
+if(NON_TOP_CANDIDATE_TAGS_RE$1$1.test(sibling.tagName)){return null;}var siblingScore=getScore$1($sibling);if(siblingScore){if($sibling.get(0)===$candidate.get(0)){wrappingDiv.append($sibling);}else{var contentBonus=0;var density=linkDensity$1($sibling);// If sibling has a very low link density,
+// give it a small bonus
+if(density<0.05){contentBonus+=20;}// If sibling has a high link density,
+// give it a penalty
+if(density>=0.5){contentBonus-=20;}// If sibling node has the same class as
+// candidate, give it a bonus
+if($sibling.attr('class')===$candidate.attr('class')){contentBonus+=topScore*0.2;}var newScore=siblingScore+contentBonus;if(newScore>=siblingScoreThreshold){return wrappingDiv.append($sibling);}else if(sibling.tagName==='p'){var siblingContent=$sibling.text();var siblingContentLength=textLength$1(siblingContent);if(siblingContentLength>80&&density<0.25){return wrappingDiv.append($sibling);}else if(siblingContentLength<=80&&density===0&&hasSentenceEnd$1(siblingContent)){return wrappingDiv.append($sibling);}}}}return null;});if(wrappingDiv.children().length===1&&wrappingDiv.children().first().get(0)===$candidate.get(0)){return $candidate;}return wrappingDiv;}// After we've calculated scores, loop through all of the possible
+// candidate nodes we found and find the one with the highest score.
+function findTopCandidate$$1($){var $candidate=void 0;var topScore=0;$('[score]').each(function(index,node){// Ignore tags like BR, HR, etc
+if(NON_TOP_CANDIDATE_TAGS_RE$1$1.test(node.tagName)){return;}var $node=$(node);var score=getScore$1($node);if(score>topScore){topScore=score;$candidate=$node;}});// If we don't have a candidate, return the body
+// or whatever the first element is
+if(!$candidate){return $('body')||$('*').first();}$candidate=mergeSiblings$1($candidate,topScore,$);return $candidate;}// Scoring
+function removeUnlessContent$1($node,$,weight){// Explicitly save entry-content-asset tags, which are
+// noted as valuable in the Publisher guidelines. For now
+// this works everywhere. We may want to consider making
+// this less of a sure-thing later.
+if($node.hasClass('entry-content-asset')){return;}var content=normalizeSpaces$1($node.text());if(scoreCommas$1(content)<10){var pCount=$('p',$node).length;var inputCount=$('input',$node).length;// Looks like a form, too many inputs.
+if(inputCount>pCount/3){$node.remove();return;}var contentLength=content.length;var imgCount=$('img',$node).length;// Content is too short, and there are no images, so
+// this is probably junk content.
+if(contentLength<25&&imgCount===0){$node.remove();return;}var density=linkDensity$1($node);// Too high of link density, is probably a menu or
+// something similar.
+// console.log(weight, density, contentLength)
+if(weight<25&&density>0.2&&contentLength>75){$node.remove();return;}// Too high of a link density, despite the score being
+// high.
+if(weight>=25&&density>0.5){// Don't remove the node if it's a list and the
+// previous sibling starts with a colon though. That
+// means it's probably content.
+var tagName=$node.get(0).tagName.toLowerCase();var nodeIsList=tagName==='ol'||tagName==='ul';if(nodeIsList){var previousNode=$node.prev();if(previousNode&&normalizeSpaces$1(previousNode.text()).slice(-1)===':'){return;}}$node.remove();return;}var scriptCount=$('script',$node).length;// Too many script tags, not enough content.
+if(scriptCount>0&&contentLength<150){$node.remove();return;}}}// Given an article, clean it of some superfluous content specified by
+// tags. Things like forms, ads, etc.
//
-// # Gifs are more often structure than photos
-// if img_path.endswith('gif'):
-// logger.debug('gif found. Subtracting 10.')
-// img_score -= 10
+// Tags is an array of tag name's to search through. (like div, form,
+// etc)
//
-// # JPGs are more often photographs
-// if img_path.endswith('jpg'):
-// logger.debug('jpg found. Adding 10.')
-// img_score += 10
+// Return this same doc.
+function cleanTags$$1($article,$){$(CLEAN_CONDITIONALLY_TAGS$1,$article).each(function(index,node){var $node=$(node);var weight=getScore$1($node);if(!weight){weight=getOrInitScore$$1($node,$);setScore$1($node,$,weight);}// drop node if its weight is < 0
+if(weight<0){$node.remove();}else{// deteremine if node seems like content
+removeUnlessContent$1($node,$,weight);}});return $;}function cleanHeaders$1($article,$){var title=arguments.length>2&&arguments[2]!==undefined?arguments[2]:'';$(HEADER_TAG_LIST$1,$article).each(function(index,header){var $header=$(header);// Remove any headers that appear before all other p tags in the
+// document. This probably means that it was part of the title, a
+// subtitle or something else extraneous like a datestamp or byline,
+// all of which should be handled by other metadata handling.
+if($($header,$article).prevAll('p').length===0){return $header.remove();}// Remove any headers that match the title exactly.
+if(normalizeSpaces$1($(header).text())===title){return $header.remove();}// If this header has a negative weight, it's probably junk.
+// Get rid of it.
+if(getWeight$1($(header))<0){return $header.remove();}return $header;});return $;}// Rewrite the tag name to div if it's a top level node like body or
+// html to avoid later complications with multiple body tags.
+function rewriteTopLevel$$1(article,$){// I'm not using context here because
+// it's problematic when converting the
+// top-level/root node - AP
+$=convertNodeTo$$1($('html'),$,'div');$=convertNodeTo$$1($('body'),$,'div');return $;}/* eslint-disable */function absolutize$1($,rootUrl,attr,$content){$('['+attr+']',$content).each(function(_,node){var attrs=getAttrs$1(node);var url=attrs[attr];if(url){var absoluteUrl=URL$1.resolve(rootUrl,url);setAttr$1(node,attr,absoluteUrl);}});}function makeLinksAbsolute$$1($content,$,url){['href','src'].forEach(function(attr){return absolutize$1($,url,attr,$content);});return $content;}function textLength$1(text){return text.trim().replace(/\s+/g,' ').length;}// Determines what percentage of the text
+// in a node is link text
+// Takes a node, returns a float
+function linkDensity$1($node){var totalTextLength=textLength$1($node.text());var linkText=$node.find('a').text();var linkLength=textLength$1(linkText);if(totalTextLength>0){return linkLength/totalTextLength;}else if(totalTextLength===0&&linkLength>0){return 1;}return 0;}// Given a node type to search for, and a list of meta tag names to
+// search for, find a meta tag associated.
+function extractFromMeta$$1($,metaNames,cachedNames){var cleanTags$$1=arguments.length>3&&arguments[3]!==undefined?arguments[3]:true;var foundNames=metaNames.filter(function(name){return cachedNames.indexOf(name)!==-1;});var _iteratorNormalCompletion=true;var _didIteratorError=false;var _iteratorError=undefined;try{var _loop=function _loop(){var name=_step.value;var type='name';var value='value';var nodes=$('meta['+type+'="'+name+'"]');// Get the unique value of every matching node, in case there
+// are two meta tags with the same name and value.
+// Remove empty values.
+var values=nodes.map(function(index,node){return $(node).attr(value);}).toArray().filter(function(text){return text!=='';});// If we have more than one value for the same name, we have a
+// conflict and can't trust any of them. Skip this name. If we have
+// zero, that means our meta tags had no values. Skip this name
+// also.
+if(values.length===1){var metaValue=void 0;// Meta values that contain HTML should be stripped, as they
+// weren't subject to cleaning previously.
+if(cleanTags$$1){metaValue=stripTags$1(values[0],$);}else{metaValue=values[0];}return{v:metaValue};}};for(var _iterator=_getIterator$1(foundNames),_step;!(_iteratorNormalCompletion=(_step=_iterator.next()).done);_iteratorNormalCompletion=true){var _ret=_loop();if((typeof _ret==='undefined'?'undefined':_typeof$1(_ret))==="object")return _ret.v;}// If nothing is found, return null
+}catch(err){_didIteratorError=true;_iteratorError=err;}finally{try{if(!_iteratorNormalCompletion&&_iterator.return){_iterator.return();}}finally{if(_didIteratorError){throw _iteratorError;}}}return null;}function isGoodNode$1($node,maxChildren){// If it has a number of children, it's more likely a container
+// element. Skip it.
+if($node.children().length>maxChildren){return false;}// If it looks to be within a comment, skip it.
+if(withinComment$$1($node)){return false;}return true;}// Given a a list of selectors find content that may
+// be extractable from the document. This is for flat
+// meta-information, like author, title, date published, etc.
+function extractFromSelectors$$1($,selectors){var maxChildren=arguments.length>2&&arguments[2]!==undefined?arguments[2]:1;var textOnly=arguments.length>3&&arguments[3]!==undefined?arguments[3]:true;var _iteratorNormalCompletion=true;var _didIteratorError=false;var _iteratorError=undefined;try{for(var _iterator=_getIterator$1(selectors),_step;!(_iteratorNormalCompletion=(_step=_iterator.next()).done);_iteratorNormalCompletion=true){var selector=_step.value;var nodes=$(selector);// If we didn't get exactly one of this selector, this may be
+// a list of articles or comments. Skip it.
+if(nodes.length===1){var $node=$(nodes[0]);if(isGoodNode$1($node,maxChildren)){var content=void 0;if(textOnly){content=$node.text();}else{content=$node.html();}if(content){return content;}}}}}catch(err){_didIteratorError=true;_iteratorError=err;}finally{try{if(!_iteratorNormalCompletion&&_iterator.return){_iterator.return();}}finally{if(_didIteratorError){throw _iteratorError;}}}return null;}// strips all tags from a string of text
+function stripTags$1(text,$){// Wrapping text in html element prevents errors when text
+// has no html
+var cleanText=$(''+text+'').text();return cleanText===''?text:cleanText;}function withinComment$$1($node){var parents=$node.parents().toArray();var commentParent=parents.find(function(parent){var attrs=getAttrs$1(parent);var nodeClass=attrs.class,id=attrs.id;var classAndId=nodeClass+' '+id;return classAndId.includes('comment');});return commentParent!==undefined;}// Given a node, determine if it's article-like enough to return
+// param: node (a cheerio node)
+// return: boolean
+function nodeIsSufficient$1($node){return $node.text().trim().length>=100;}function isWordpress$1($){return $(IS_WP_SELECTOR$1).length>0;}function getAttrs$1(node){var attribs=node.attribs,attributes=node.attributes;if(!attribs&&attributes){var attrs=_Reflect$ownKeys$1(attributes).reduce(function(acc,index){var attr=attributes[index];acc[attr.name]=attr.value;return acc;},{});return attrs;}return attribs;}function setAttr$1(node,attr,val){if(node.attribs){node.attribs[attr]=val;}else if(node.attributes){node.setAttribute(attr,val);}return node;}/* eslint-disable */function setAttrs$1(node,attrs){if(node.attribs){node.attribs=attrs;}else if(node.attributes){while(node.attributes.length>0){node.removeAttribute(node.attributes[0].name);}_Reflect$ownKeys$1(attrs).forEach(function(key){node.setAttribute(key,attrs[key]);});}return node;}// DOM manipulation
+var IS_LINK=new RegExp('https?://','i');var IS_IMAGE=new RegExp('.(png|gif|jpe?g)','i');var TAGS_TO_REMOVE=['script','style','form'].join(',');// Convert all instances of images with potentially
+// lazy loaded images into normal images.
+// Many sites will have img tags with no source, or an image tag with a src
+// attribute that a is a placeholer. We need to be able to properly fill in
+// the src attribute so the images are no longer lazy loaded.
+function convertLazyLoadedImages($){$('img').each(function(_,img){var attrs=getAttrs$1(img);_Reflect$ownKeys$1(attrs).forEach(function(attr){var value=attrs[attr];if(attr!=='src'&&IS_LINK.test(value)&&IS_IMAGE.test(value)){$(img).attr('src',value);}});});return $;}function isComment(index,node){return node.type==='comment';}function cleanComments($){$('*').first().contents().filter(isComment).remove();return $;}function clean($){$(TAGS_TO_REMOVE).remove();$=cleanComments($);return $;}var Resource={// Create a Resource.
//
-// # PNGs are neutral.
+// :param url: The URL for the document we should retrieve.
+// :param response: If set, use as the response rather than
+// attempting to fetch it ourselves. Expects a
+// string.
+create:function create(url,preparedResponse,parsedUrl){var _this=this;return _asyncToGenerator(_regeneratorRuntime.mark(function _callee(){var result,validResponse;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:result=void 0;if(!preparedResponse){_context.next=6;break;}validResponse={statusMessage:'OK',statusCode:200,headers:{'content-type':'text/html','content-length':500}};result={body:preparedResponse,response:validResponse};_context.next=9;break;case 6:_context.next=8;return fetchResource$1(url,parsedUrl);case 8:result=_context.sent;case 9:if(!result.error){_context.next=11;break;}return _context.abrupt('return',result);case 11:return _context.abrupt('return',_this.generateDoc(result));case 12:case'end':return _context.stop();}}},_callee,_this);}))();},generateDoc:function generateDoc(_ref){var content=_ref.body,response=_ref.response;var contentType=response.headers['content-type'];// TODO: Implement is_text function from
+// https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57
+if(!contentType.includes('html')&&!contentType.includes('text')){throw new Error('Content does not appear to be text.');}var $=cheerio$1.load(content,{normalizeWhitespace:true});if($('*').first().children().length===0){throw new Error('No children, likely a bad parse.');}$=normalizeMetaTags($);$=convertLazyLoadedImages($);$=clean($);return $;}};var merge=function merge(extractor,domains){return domains.reduce(function(acc,domain){acc[domain]=extractor;return acc;},{});};function mergeSupportedDomains(extractor){return extractor.supportedDomains?merge(extractor,[extractor.domain].concat(_toConsumableArray$1(extractor.supportedDomains))):merge(extractor,[extractor.domain]);}var BloggerExtractor={domain:'blogspot.com',content:{// Blogger is insane and does not load its content
+// initially in the page, but it's all there
+// in noscript
+selectors:['.post-content noscript'],// Selectors to remove from the extracted content
+clean:[],// Convert the noscript tag to a div
+transforms:{noscript:'div'}},author:{selectors:['.post-author-name']},title:{selectors:['.post h2.title']},date_published:{selectors:['span.publishdate']}};var NYMagExtractor={domain:'nymag.com',content:{// Order by most likely. Extractor will stop on first occurrence
+selectors:['div.article-content','section.body','article.article'],// Selectors to remove from the extracted content
+clean:['.ad','.single-related-story'],// Object of tranformations to make on matched elements
+// Each key is the selector, each value is the tag to
+// transform to.
+// If a function is given, it should return a string
+// to convert to or nothing (in which case it will not perform
+// the transformation.
+transforms:{// Convert h1s to h2s
+h1:'h2',// Convert lazy-loaded noscript images to figures
+noscript:function noscript($node,$){if($.browser){var $children=$($node.text());if($children.length===1&&$children.get(0)!==undefined&&$children.get(0).tagName.toLowerCase()==='img'){return'figure';}}else{var _$children=$node.children();if(_$children.length===1&&_$children.get(0).tagName==='img'){return'figure';}}return null;}}},title:{selectors:['h1.lede-feature-title','h1.headline-primary','h1']},author:{selectors:['.by-authors','.lede-feature-author']},dek:{selectors:['.lede-feature-teaser']},date_published:{selectors:[['time.article-timestamp[datetime]','datetime'],'time.article-timestamp']}};var WikipediaExtractor={domain:'wikipedia.org',content:{selectors:['#mw-content-text'],defaultCleaner:false,// transform top infobox to an image with caption
+transforms:{'.infobox img':function infoboxImg($node){var $parent=$node.parents('.infobox');// Only prepend the first image in .infobox
+if($parent.children('img').length===0){$parent.prepend($node);}},'.infobox caption':'figcaption','.infobox':'figure'},// Selectors to remove from the extracted content
+clean:['.mw-editsection','figure tr, figure td, figure tbody','#toc','.navbox']},author:'Wikipedia Contributors',title:{selectors:['h2.title']},date_published:{selectors:['#footer-info-lastmod']}};var TwitterExtractor={domain:'twitter.com',content:{transforms:{// We're transforming essentially the whole page here.
+// Twitter doesn't have nice selectors, so our initial
+// selector grabs the whole page, then we're re-writing
+// it to fit our needs before we clean it up.
+'.permalink[role=main]':function permalinkRoleMain($node,$){var tweets=$node.find('.tweet');var $tweetContainer=$('');$tweetContainer.append(tweets);$node.replaceWith($tweetContainer);},// Twitter wraps @ with s, which
+// renders as a strikethrough
+s:'span'},selectors:['.permalink[role=main]'],defaultCleaner:false,clean:['.stream-item-footer','button','.tweet-details-fixer']},author:{selectors:['.tweet.permalink-tweet .username']},date_published:{selectors:[['.permalink-tweet ._timestamp[data-time-ms]','data-time-ms']]}};var NYTimesExtractor={domain:'www.nytimes.com',title:{selectors:['.g-headline','h1.headline']},author:{selectors:[['meta[name="author"]','value'],'.g-byline','.byline']},content:{selectors:['div.g-blocks','article#story'],defaultCleaner:false,transforms:{'img.g-lazy':function imgGLazy($node){var src=$node.attr('src');// const widths = $node.attr('data-widths')
+// .slice(1)
+// .slice(0, -1)
+// .split(',');
+// if (widths.length) {
+// width = widths.slice(-1);
+// } else {
+// width = '900';
+// }
+var width=640;src=src.replace('{{size}}',width);$node.attr('src',src);}},clean:['.ad','header#story-header','.story-body-1 .lede.video','.visually-hidden','#newsletter-promo','.promo','.comments-button','.hidden']},date_published:null,lead_image_url:null,dek:null,next_page_url:null,excerpt:null};// Rename CustomExtractor
+// to fit your publication
+var TheAtlanticExtractor={domain:'www.theatlantic.com',title:{selectors:['h1.hed']},author:{selectors:['article#article .article-cover-extra .metadata .byline a']},content:{selectors:['.article-body'],// Is there anything in the content you selected that needs transformed
+// before it's consumable content? E.g., unusual lazy loaded images
+transforms:[],// Is there anything that is in the result that shouldn't be?
+// The clean selectors will remove anything that matches from
+// the result
+clean:[]},date_published:{selectors:[['time[itemProp="datePublished"]','datetime']]},lead_image_url:null,dek:null,next_page_url:null,excerpt:null};// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var NewYorkerExtractor={domain:'www.newyorker.com',title:{selectors:['h1.title']},author:{selectors:['.contributors']},content:{selectors:['div#articleBody','div.articleBody'],// Is there anything in the content you selected that needs transformed
+// before it's consumable content? E.g., unusual lazy loaded images
+transforms:[],// Is there anything that is in the result that shouldn't be?
+// The clean selectors will remove anything that matches from
+// the result
+clean:[]},date_published:{selectors:[['meta[name="article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var WiredExtractor={domain:'www.wired.com',title:{selectors:['h1.post-title']},author:{selectors:['a[rel="author"]']},content:{selectors:['article.content'],// Is there anything in the content you selected that needs transformed
+// before it's consumable content? E.g., unusual lazy loaded images
+transforms:[],// Is there anything that is in the result that shouldn't be?
+// The clean selectors will remove anything that matches from
+// the result
+clean:['.visually-hidden']},date_published:{selectors:[['meta[itemprop="datePublished"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var MSNExtractor={domain:'www.msn.com',title:{selectors:['h1']},author:{selectors:['span.authorname-txt']},content:{selectors:['div.richtext'],// Is there anything in the content you selected that needs transformed
+// before it's consumable content? E.g., unusual lazy loaded images
+transforms:[],// Is there anything that is in the result that shouldn't be?
+// The clean selectors will remove anything that matches from
+// the result
+clean:['span.caption']},date_published:{selectors:['span.time']},lead_image_url:{selectors:[]},dek:{selectors:[['meta[name="description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var YahooExtractor={domain:'www.yahoo.com',title:{selectors:['header.canvas-header']},author:{selectors:['span.provider-name']},content:{selectors:[// enter content selectors
+'.content-canvas'],// Is there anything in the content you selected that needs transformed
+// before it's consumable content? E.g., unusual lazy loaded images
+transforms:[],// Is there anything that is in the result that shouldn't be?
+// The clean selectors will remove anything that matches from
+// the result
+clean:['.figure-caption']},date_published:{selectors:[['time.date[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var BuzzfeedExtractor={domain:'www.buzzfeed.com',title:{selectors:['h1[id="post-title"]']},author:{selectors:['a[data-action="user/username"]','byline__author']},content:{selectors:['#buzz_sub_buzz'],defaultCleaner:false,// Is there anything in the content you selected that needs transformed
+// before it's consumable content? E.g., unusual lazy loaded images
+transforms:{h2:'b'},// Is there anything that is in the result that shouldn't be?
+// The clean selectors will remove anything that matches from
+// the result
+clean:['.instapaper_ignore','.suplist_list_hide .buzz_superlist_item .buzz_superlist_number_inline','.share-box']},date_published:{selectors:['.buzz-datetime']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var WikiaExtractor={domain:'fandom.wikia.com',title:{selectors:['h1.entry-title']},author:{selectors:['.author vcard','.fn']},content:{selectors:['.grid-content','.entry-content'],// Is there anything in the content you selected that needs transformed
+// before it's consumable content? E.g., unusual lazy loaded images
+transforms:[],// Is there anything that is in the result that shouldn't be?
+// The clean selectors will remove anything that matches from
+// the result
+clean:[]},date_published:{selectors:[['meta[name="article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var LittleThingsExtractor={domain:'www.littlethings.com',title:{selectors:['h1.post-title']},author:{selectors:[['meta[name="author"]','value']]},content:{selectors:[// enter content selectors
+'.mainContentIntro','.content-wrapper'],// Is there anything in the content you selected that needs transformed
+// before it's consumable content? E.g., unusual lazy loaded images
+transforms:[],// Is there anything that is in the result that shouldn't be?
+// The clean selectors will remove anything that matches from
+// the result
+clean:[]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},next_page_url:null,excerpt:null};// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var PoliticoExtractor={domain:'www.politico.com',title:{selectors:[// enter title selectors
+['meta[name="og:title"]','value']]},author:{selectors:['.story-main-content .byline .vcard']},content:{selectors:[// enter content selectors
+'.story-main-content','.content-group','.story-core','.story-text'],// Is there anything in the content you selected that needs transformed
+// before it's consumable content? E.g., unusual lazy loaded images
+transforms:[],// Is there anything that is in the result that shouldn't be?
+// The clean selectors will remove anything that matches from
+// the result
+clean:['figcaption']},date_published:{selectors:[['.story-main-content .timestamp time[datetime]','datetime']]},lead_image_url:{selectors:[// enter lead_image_url selectors
+['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="description"]','value']]},next_page_url:null,excerpt:null};var DeadspinExtractor={domain:'deadspin.com',supportedDomains:['jezebel.com','lifehacker.com','kotaku.com','gizmodo.com','jalopnik.com','kinja.com'],title:{selectors:['h1.headline']},author:{selectors:['.author']},content:{selectors:['.post-content','.entry-content'],// Is there anything in the content you selected that needs transformed
+// before it's consumable content? E.g., unusual lazy loaded images
+transforms:{'iframe.lazyload[data-recommend-id^="youtube://"]':function iframeLazyloadDataRecommendIdYoutube($node){var youtubeId=$node.attr('id').split('youtube-')[1];$node.attr('src','https://www.youtube.com/embed/'+youtubeId);}},// Is there anything that is in the result that shouldn't be?
+// The clean selectors will remove anything that matches from
+// the result
+clean:[]},date_published:{selectors:[['time.updated[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[// enter selectors
+]},next_page_url:{selectors:[// enter selectors
+]},excerpt:{selectors:[// enter selectors
+]}};// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var BroadwayWorldExtractor={domain:'www.broadwayworld.com',title:{selectors:['h1.article-title']},author:{selectors:['span[itemprop=author]']},content:{selectors:['div[itemprop=articlebody]'],// Is there anything in the content you selected that needs transformed
+// before it's consumable content? E.g., unusual lazy loaded images
+transforms:{},// Is there anything that is in the result that shouldn't be?
+// The clean selectors will remove anything that matches from
+// the result
+clean:[]},date_published:{selectors:[['meta[itemprop=datePublished]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name="og:description"]','value']]},next_page_url:{selectors:[// enter selectors
+]},excerpt:{selectors:[// enter selectors
+]}};// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var ApartmentTherapyExtractor={domain:'www.apartmenttherapy.com',title:{selectors:['h1.headline']},author:{selectors:['.PostByline__name']},content:{selectors:['div.post__content'],// Is there anything in the content you selected that needs transformed
+// before it's consumable content? E.g., unusual lazy loaded images
+transforms:{'div[data-render-react-id="images/LazyPicture"]':function divDataRenderReactIdImagesLazyPicture($node,$){var data=JSON.parse($node.attr('data-props'));var src=data.sources[0].src;var $img=$('').attr('src',src);$node.replaceWith($img);}},// Is there anything that is in the result that shouldn't be?
+// The clean selectors will remove anything that matches from
+// the result
+clean:[]},date_published:{selectors:[['.PostByline__timestamp[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[['meta[name=description]','value']]},next_page_url:{selectors:[// enter selectors
+]},excerpt:{selectors:[// enter selectors
+]}};var MediumExtractor={domain:'medium.com',supportedDomains:['trackchanges.postlight.com'],title:{selectors:['h1']},author:{selectors:[['meta[name="author"]','value']]},content:{selectors:['.section-content'],// Is there anything in the content you selected that needs transformed
+// before it's consumable content? E.g., unusual lazy loaded images
+transforms:{// Re-write lazy-loaded youtube videos
+iframe:function iframe($node){var ytRe=/https:\/\/i.embed.ly\/.+url=https:\/\/i\.ytimg\.com\/vi\/(\w+)\//;var thumb=decodeURIComponent($node.attr('data-thumbnail'));if(ytRe.test(thumb)){var _thumb$match=thumb.match(ytRe),_thumb$match2=_slicedToArray$1(_thumb$match,2),_=_thumb$match2[0],youtubeId=_thumb$match2[1];// eslint-disable-line
+$node.attr('src','https://www.youtube.com/embed/'+youtubeId);var $parent=$node.parents('figure');$parent.prepend($node.clone());$node.remove();}}},// Is there anything that is in the result that shouldn't be?
+// The clean selectors will remove anything that matches from
+// the result
+clean:[]},date_published:{selectors:[['time[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[// enter selectors
+]},next_page_url:{selectors:[// enter selectors
+]},excerpt:{selectors:[// enter selectors
+]}};var CustomExtractors=_Object$freeze({BloggerExtractor:BloggerExtractor,NYMagExtractor:NYMagExtractor,WikipediaExtractor:WikipediaExtractor,TwitterExtractor:TwitterExtractor,NYTimesExtractor:NYTimesExtractor,TheAtlanticExtractor:TheAtlanticExtractor,NewYorkerExtractor:NewYorkerExtractor,WiredExtractor:WiredExtractor,MSNExtractor:MSNExtractor,YahooExtractor:YahooExtractor,BuzzfeedExtractor:BuzzfeedExtractor,WikiaExtractor:WikiaExtractor,LittleThingsExtractor:LittleThingsExtractor,PoliticoExtractor:PoliticoExtractor,DeadspinExtractor:DeadspinExtractor,BroadwayWorldExtractor:BroadwayWorldExtractor,ApartmentTherapyExtractor:ApartmentTherapyExtractor,MediumExtractor:MediumExtractor});var Extractors=_Object$keys(CustomExtractors).reduce(function(acc,key){var extractor=CustomExtractors[key];return _extends$1({},acc,mergeSupportedDomains(extractor));},{});// CLEAN AUTHOR CONSTANTS
+var CLEAN_AUTHOR_RE=/^\s*(posted |written )?by\s*:?\s*(.*)/i;// author = re.sub(r'^\s*(posted |written )?by\s*:?\s*(.*)(?i)',
+// CLEAN DEK CONSTANTS
+var TEXT_LINK_RE=new RegExp('http(s)?://','i');// An ordered list of meta tag names that denote likely article deks.
+// From most distinct to least distinct.
//
-// # Alt attribute usually means non-presentational image.
-// if 'alt' in img.attrib and len(img.attrib['alt']) > 5:
-// logger.debug('alt attribute found. Adding 5.')
-// img_score += 5
+// NOTE: There are currently no meta tags that seem to provide the right
+// content consistenty enough. Two options were:
+// - og:description
+// - dc.description
+// However, these tags often have SEO-specific junk in them that's not
+// header-worthy like a dek is. Excerpt material at best.
+// An ordered list of Selectors to find likely article deks. From
+// most explicit to least explicit.
//
-// # Look through our parent and grandparent for figure-like
-// # container elements, give a bonus if we find them
-// parents = [img.getparent()]
-// if parents[0] is not None and parents[0].getparent() is not None:
-// parents.append(parents[0].getparent())
-// for p in parents:
-// if p.tag == 'figure':
-// logger.debug('Parent with
tag found. Adding 25.')
-// img_score += 25
-//
-// p_sig = ' '.join([p.get('id', ''), p.get('class', '')])
-// if constants.PHOTO_HINTS_RE.search(p_sig):
-// logger.debug('Photo hints regex match. Adding 15.')
-// img_score += 15
+// Should be more restrictive than not, as a failed dek can be pretty
+// detrimental to the aesthetics of an article.
+// CLEAN DATE PUBLISHED CONSTANTS
+var MS_DATE_STRING=/^\d{13}$/i;var SEC_DATE_STRING=/^\d{10}$/i;var CLEAN_DATE_STRING_RE=/^\s*published\s*:?\s*(.*)/i;var TIME_MERIDIAN_SPACE_RE=/(.*\d)(am|pm)(.*)/i;var TIME_MERIDIAN_DOTS_RE=/\.m\./i;var months=['jan','feb','mar','apr','may','jun','jul','aug','sep','oct','nov','dec'];var allMonths=months.join('|');var timestamp1='[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';var timestamp2='[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';var SPLIT_DATE_STRING=new RegExp('('+timestamp1+')|('+timestamp2+')|([0-9]{1,4})|('+allMonths+')','ig');// CLEAN TITLE CONSTANTS
+// A regular expression that will match separating characters on a
+// title, that usually denote breadcrumbs or something similar.
+var TITLE_SPLITTERS_RE=/(: | - | \| )/g;var DOMAIN_ENDINGS_RE=new RegExp('.com$|.net$|.org$|.co.uk$','g');// Take an author string (like 'By David Smith ') and clean it to
+// just the name(s): 'David Smith'.
+function cleanAuthor(author){return author.replace(CLEAN_AUTHOR_RE,'$2').trim();}function clean$1(leadImageUrl){leadImageUrl=leadImageUrl.trim();if(validUrl$1.isWebUri(leadImageUrl)){return leadImageUrl;}return null;}// Take a dek HTML fragment, and return the cleaned version of it.
+// Return None if the dek wasn't good enough.
+function cleanDek(dek,_ref){var $=_ref.$,excerpt=_ref.excerpt;// Sanity check that we didn't get too short or long of a dek.
+if(dek.length>1000||dek.length<5)return null;// Check that dek isn't the same as excerpt
+if(excerpt&&excerptContent$1(excerpt,10)===excerptContent$1(dek,10))return null;var dekText=stripTags$1(dek,$);// Plain text links shouldn't exist in the dek. If we have some, it's
+// not a good dek - bail.
+if(TEXT_LINK_RE.test(dekText))return null;return dekText.trim();}// Is there a compelling reason to use moment here?
+// Mostly only being used for the isValid() method,
+// but could just check for 'Invalid Date' string.
+function cleanDateString(dateString){return(dateString.match(SPLIT_DATE_STRING)||[]).join(' ').replace(TIME_MERIDIAN_DOTS_RE,'m').replace(TIME_MERIDIAN_SPACE_RE,'$1 $2 $3').replace(CLEAN_DATE_STRING_RE,'$1').trim();}// Take a date published string, and hopefully return a date out of
+// it. Return none if we fail.
+function cleanDatePublished(dateString){// If string is in milliseconds or seconds, convert to int
+if(MS_DATE_STRING.test(dateString)||SEC_DATE_STRING.test(dateString)){dateString=parseInt(dateString,10);}var date=moment$1(new Date(dateString));if(!date.isValid()){dateString=cleanDateString(dateString);date=moment$1(new Date(dateString));}return date.isValid()?date.toISOString():null;}// Clean our article content, returning a new, cleaned node.
+function extractCleanNode(article,_ref){var $=_ref.$,_ref$cleanConditional=_ref.cleanConditionally,cleanConditionally=_ref$cleanConditional===undefined?true:_ref$cleanConditional,_ref$title=_ref.title,title=_ref$title===undefined?'':_ref$title,_ref$url=_ref.url,url=_ref$url===undefined?'':_ref$url,_ref$defaultCleaner=_ref.defaultCleaner,defaultCleaner=_ref$defaultCleaner===undefined?true:_ref$defaultCleaner;// Rewrite the tag name to div if it's a top level node like body or
+// html to avoid later complications with multiple body tags.
+rewriteTopLevel$$1(article,$);// Drop small images and spacer images
+// Only do this is defaultCleaner is set to true;
+// this can sometimes be too aggressive.
+if(defaultCleaner)cleanImages$1(article,$);// Mark elements to keep that would normally be removed.
+// E.g., stripJunkTags will remove iframes, so we're going to mark
+// YouTube/Vimeo videos as elements we want to keep.
+markToKeep$1(article,$,url);// Drop certain tags like , etc
+// This is -mostly- for cleanliness, not security.
+stripJunkTags$1(article,$);// H1 tags are typically the article title, which should be extracted
+// by the title extractor instead. If there's less than 3 of them (<3),
+// strip them. Otherwise, turn 'em into H2s.
+cleanHOnes$$1(article,$);// Clean headers
+cleanHeaders$1(article,$,title);// Make links absolute
+makeLinksAbsolute$$1(article,$,url);// We used to clean UL's and OL's here, but it was leading to
+// too many in-article lists being removed. Consider a better
+// way to detect menus particularly and remove them.
+// Also optionally running, since it can be overly aggressive.
+if(defaultCleaner)cleanTags$$1(article,$,cleanConditionally);// Remove empty paragraph nodes
+removeEmpty$1(article,$);// Remove unnecessary attributes
+cleanAttributes$$1(article,$);return article;}function cleanTitle$$1(title,_ref){var url=_ref.url,$=_ref.$;// If title has |, :, or - in it, see if
+// we can clean it up.
+if(TITLE_SPLITTERS_RE.test(title)){title=resolveSplitTitle(title,url);}// Final sanity check that we didn't get a crazy title.
+// if (title.length > 150 || title.length < 15) {
+if(title.length>150){// If we did, return h1 from the document if it exists
+var h1=$('h1');if(h1.length===1){title=h1.text();}}// strip any html tags in the title text
+return stripTags$1(title,$).trim();}function extractBreadcrumbTitle(splitTitle,text){// This must be a very breadcrumbed title, like:
+// The Best Gadgets on Earth : Bits : Blogs : NYTimes.com
+// NYTimes - Blogs - Bits - The Best Gadgets on Earth
+if(splitTitle.length>=6){var _ret=function(){// Look to see if we can find a breadcrumb splitter that happens
+// more than once. If we can, we'll be able to better pull out
+// the title.
+var termCounts=splitTitle.reduce(function(acc,titleText){acc[titleText]=acc[titleText]?acc[titleText]+1:1;return acc;},{});var _Reflect$ownKeys$redu=_Reflect$ownKeys$1(termCounts).reduce(function(acc,key){if(acc[1]> "
+// will match, but nothing longer than that.
+if(termCount>=2&&maxTerm.length<=4){splitTitle=text.split(maxTerm);}var splitEnds=[splitTitle[0],splitTitle.slice(-1)];var longestEnd=splitEnds.reduce(function(acc,end){return acc.length>end.length?acc:end;},'');if(longestEnd.length>10){return{v:longestEnd};}return{v:text};}();if((typeof _ret==='undefined'?'undefined':_typeof$1(_ret))==="object")return _ret.v;}return null;}function cleanDomainFromTitle(splitTitle,url){// Search the ends of the title, looking for bits that fuzzy match
+// the URL too closely. If one is found, discard it and return the
+// rest.
//
-// # Look at our immediate sibling and see if it looks like it's a
-// # caption. Bonus if so.
-// sibling = img.getnext()
-// if sibling is not None:
-// if sibling.tag == 'figcaption':
-// img_score += 25
+// Strip out the big TLDs - it just makes the matching a bit more
+// accurate. Not the end of the world if it doesn't strip right.
+var _URL$parse=URL$1.parse(url),host=_URL$parse.host;var nakedDomain=host.replace(DOMAIN_ENDINGS_RE,'');var startSlug=splitTitle[0].toLowerCase().replace(' ','');var startSlugRatio=wuzzy$1.levenshtein(startSlug,nakedDomain);if(startSlugRatio>0.4&&startSlug.length>5){return splitTitle.slice(2).join('');}var endSlug=splitTitle.slice(-1)[0].toLowerCase().replace(' ','');var endSlugRatio=wuzzy$1.levenshtein(endSlug,nakedDomain);if(endSlugRatio>0.4&&endSlug.length>=5){return splitTitle.slice(0,-2).join('');}return null;}// Given a title with separators in it (colons, dashes, etc),
+// resolve whether any of the segments should be removed.
+function resolveSplitTitle(title){var url=arguments.length>1&&arguments[1]!==undefined?arguments[1]:'';// Splits while preserving splitters, like:
+// ['The New New York', ' - ', 'The Washington Post']
+var splitTitle=title.split(TITLE_SPLITTERS_RE);if(splitTitle.length===1){return title;}var newTitle=extractBreadcrumbTitle(splitTitle,title);if(newTitle)return newTitle;newTitle=cleanDomainFromTitle(splitTitle,url);if(newTitle)return newTitle;// Fuzzy ratio didn't find anything, so this title is probably legit.
+// Just return it all.
+return title;}var Cleaners={author:cleanAuthor,lead_image_url:clean$1,dek:cleanDek,date_published:cleanDatePublished,content:extractCleanNode,title:cleanTitle$$1};// Using a variety of scoring techniques, extract the content most
+// likely to be article text.
//
-// sib_sig = ' '.join([sibling.get('id', ''),
-// sibling.get('class', '')]).lower()
-// if 'caption' in sib_sig:
-// img_score += 15
+// If strip_unlikely_candidates is True, remove any elements that
+// match certain criteria first. (Like, does this element have a
+// classname of "comment")
//
-// # Pull out width/height if they were set.
-// img_width = None
-// img_height = None
-// if 'width' in img.attrib:
-// try:
-// img_width = float(img.get('width'))
-// except ValueError:
-// pass
-// if 'height' in img.attrib:
-// try:
-// img_height = float(img.get('height'))
-// except ValueError:
-// pass
+// If weight_nodes is True, use classNames and IDs to determine the
+// worthiness of nodes.
//
-// # Penalty for skinny images
-// if img_width and img_width <= 50:
-// logger.debug('Skinny image found. Subtracting 50.')
-// img_score -= 50
+// Returns a cheerio object $
+function extractBestNode($,opts){// clone the node so we can get back to our
+// initial parsed state if needed
+// TODO Do I need this? – AP
+// let $root = $.root().clone()
+if(opts.stripUnlikelyCandidates){$=stripUnlikelyCandidates$1($);}$=convertToParagraphs$$1($);$=scoreContent$$1($,opts.weightNodes);var $topCandidate=findTopCandidate$$1($);return $topCandidate;}var GenericContentExtractor={defaultOpts:{stripUnlikelyCandidates:true,weightNodes:true,cleanConditionally:true},// Extract the content for this resource - initially, pass in our
+// most restrictive opts which will return the highest quality
+// content. On each failure, retry with slightly more lax opts.
//
-// # Penalty for short images
-// if img_height and img_height <= 50:
-// # Wide, short images are more common than narrow, tall ones
-// logger.debug('Short image found. Subtracting 25.')
-// img_score -= 25
+// :param return_type: string. If "node", should return the content
+// as a cheerio node rather than as an HTML string.
//
-// if img_width and img_height and not 'sprite' in img_path:
-// area = img_width * img_height
+// Opts:
+// stripUnlikelyCandidates: Remove any elements that match
+// non-article-like criteria first.(Like, does this element
+// have a classname of "comment")
//
-// if area < 5000: # Smaller than 50x100
-// logger.debug('Image with small area found. Subtracting 100.')
-// img_score -= 100
-// else:
-// img_score += round(area/1000.0)
+// weightNodes: Modify an elements score based on whether it has
+// certain classNames or IDs. Examples: Subtract if a node has
+// a className of 'comment', Add if a node has an ID of
+// 'entry-content'.
//
-// # If the image is higher on the page than other images,
-// # it gets a bonus. Penalty if lower.
-// logger.debug('Adding page placement bonus of %d.', len(imgs)/2 - i)
-// img_score += len(imgs)/2 - i
+// cleanConditionally: Clean the node to return of some
+// superfluous content. Things like forms, ads, etc.
+extract:function extract(_ref,opts){var $=_ref.$,html=_ref.html,title=_ref.title,url=_ref.url,cheerio$$1=_ref.cheerio;opts=_extends$1({},this.defaultOpts,opts);$=$||cheerio$$1.load(html);// Cascade through our extraction-specific opts in an ordered fashion,
+// turning them off as we try to extract content.
+var node=this.getContentNode($,title,url,opts);if(nodeIsSufficient$1(node)){return this.cleanAndReturnNode(node,$);}// We didn't succeed on first pass, one by one disable our
+// extraction opts and try again.
+var _iteratorNormalCompletion=true;var _didIteratorError=false;var _iteratorError=undefined;try{for(var _iterator=_getIterator$1(_Reflect$ownKeys$1(opts).filter(function(k){return opts[k]===true;})),_step;!(_iteratorNormalCompletion=(_step=_iterator.next()).done);_iteratorNormalCompletion=true){var key=_step.value;opts[key]=false;$=cheerio$$1.load(html);node=this.getContentNode($,title,url,opts);if(nodeIsSufficient$1(node)){break;}}}catch(err){_didIteratorError=true;_iteratorError=err;}finally{try{if(!_iteratorNormalCompletion&&_iterator.return){_iterator.return();}}finally{if(_didIteratorError){throw _iteratorError;}}}return this.cleanAndReturnNode(node,$);},// Get node given current options
+getContentNode:function getContentNode($,title,url,opts){return extractCleanNode(extractBestNode($,opts),{$:$,cleanConditionally:opts.cleanConditionally,title:title,url:url});},// Once we got here, either we're at our last-resort node, or
+// we broke early. Make sure we at least have -something- before we
+// move forward.
+cleanAndReturnNode:function cleanAndReturnNode(node,$){if(!node){return null;}return normalizeSpaces$1($.html(node));// if return_type == "html":
+// return normalize_spaces(node_to_html(node))
+// else:
+// return node
+}};// TODO: It would be great if we could merge the meta and selector lists into
+// a list of objects, because we could then rank them better. For example,
+// .hentry .entry-title is far better suited than .
+// An ordered list of meta tag names that denote likely article titles. All
+// attributes should be lowercase for faster case-insensitive matching. From
+// most distinct to least distinct.
+var STRONG_TITLE_META_TAGS=['tweetmeme-title','dc.title','rbtitle','headline','title'];// og:title is weak because it typically contains context that we don't like,
+// for example the source site's name. Gotta get that brand into facebook!
+var WEAK_TITLE_META_TAGS=['og:title'];// An ordered list of XPath Selectors to find likely article titles. From
+// most explicit to least explicit.
//
-// # Use the raw src here because we munged img_path for case
-// # insensitivity
-// logger.debug('Final score is %d.', img_score)
-// img_scores[img.attrib['src']] += img_score
+// Note - this does not use classes like CSS. This checks to see if the string
+// exists in the className, which is not as accurate as .className (which
+// splits on spaces/endlines), but for our purposes it's close enough. The
+// speed tradeoff is worth the accuracy hit.
+var STRONG_TITLE_SELECTORS=['.hentry .entry-title','h1#articleHeader','h1.articleHeader','h1.article','.instapaper_title','#meebo-title'];var WEAK_TITLE_SELECTORS=['article h1','#entry-title','.entry-title','#entryTitle','#entrytitle','.entryTitle','.entrytitle','#articleTitle','.articleTitle','post post-title','h1.title','h2.article','h1','html head title','title'];var GenericTitleExtractor={extract:function extract(_ref){var $=_ref.$,url=_ref.url,metaCache=_ref.metaCache;// First, check to see if we have a matching meta tag that we can make
+// use of that is strongly associated with the headline.
+var title=void 0;title=extractFromMeta$$1($,STRONG_TITLE_META_TAGS,metaCache);if(title)return cleanTitle$$1(title,{url:url,$:$});// Second, look through our content selectors for the most likely
+// article title that is strongly associated with the headline.
+title=extractFromSelectors$$1($,STRONG_TITLE_SELECTORS);if(title)return cleanTitle$$1(title,{url:url,$:$});// Third, check for weaker meta tags that may match.
+title=extractFromMeta$$1($,WEAK_TITLE_META_TAGS,metaCache);if(title)return cleanTitle$$1(title,{url:url,$:$});// Last, look for weaker selector tags that may match.
+title=extractFromSelectors$$1($,WEAK_TITLE_SELECTORS);if(title)return cleanTitle$$1(title,{url:url,$:$});// If no matches, return an empty string
+return'';}};// An ordered list of meta tag names that denote likely article authors. All
+// attributes should be lowercase for faster case-insensitive matching. From
+// most distinct to least distinct.
//
-// top_score = 0
-// top_url = None
-// for (url, score) in img_scores.items():
-// if score > top_score:
-// top_url = url
-// top_score = score
+// Note: "author" is too often the -developer- of the page, so it is not
+// added here.
+var AUTHOR_META_TAGS=['byl','clmst','dc.author','dcsext.author','dc.creator','rbauthors','authors'];var AUTHOR_MAX_LENGTH=300;// An ordered list of XPath Selectors to find likely article authors. From
+// most explicit to least explicit.
//
-// if top_score > 0:
-// logger.debug('Using top score image from content. Score was %d', top_score)
-// return top_url
+// Note - this does not use classes like CSS. This checks to see if the string
+// exists in the className, which is not as accurate as .className (which
+// splits on spaces/endlines), but for our purposes it's close enough. The
+// speed tradeoff is worth the accuracy hit.
+var AUTHOR_SELECTORS=['.entry .entry-author','.author.vcard .fn','.author .vcard .fn','.byline.vcard .fn','.byline .vcard .fn','.byline .by .author','.byline .by','.byline .author','.post-author.vcard','.post-author .vcard','a[rel=author]','#by_author','.by_author','#entryAuthor','.entryAuthor','.byline a[href*=author]','#author .authorname','.author .authorname','#author','.author','.articleauthor','.ArticleAuthor','.byline'];// An ordered list of Selectors to find likely article authors, with
+// regular expression for content.
+var bylineRe=/^[\n\s]*By/i;var BYLINE_SELECTORS_RE=[['#byline',bylineRe],['.byline',bylineRe]];var GenericAuthorExtractor={extract:function extract(_ref){var $=_ref.$,metaCache=_ref.metaCache;var author=void 0;// First, check to see if we have a matching
+// meta tag that we can make use of.
+author=extractFromMeta$$1($,AUTHOR_META_TAGS,metaCache);if(author&&author.length.
-// logger.debug('Trying to find lead image in probable nodes')
-// for selector in constants.LEAD_IMAGE_URL_SELECTORS:
-// nodes = self.resource.extract_by_selector(selector)
-// for node in nodes:
-// clean_value = None
-// if node.attrib.get('src'):
-// clean_value = self.clean(node.attrib['src'])
+// # 80% or greater similarity means the dek was very similar to some
+// # of the starting content, so we skip it.
+// if fuzz.partial_ratio(content_chunk, dek_chunk) < 80:
+// return dek
//
-// if not clean_value and node.attrib.get('href'):
-// clean_value = self.clean(node.attrib['href'])
+// return None
+// An ordered list of meta tag names that denote likely article leading images.
+// All attributes should be lowercase for faster case-insensitive matching.
+// From most distinct to least distinct.
+var LEAD_IMAGE_URL_META_TAGS=['og:image','twitter:image','image_src'];var LEAD_IMAGE_URL_SELECTORS=['link[rel=image_src]'];var POSITIVE_LEAD_IMAGE_URL_HINTS=['upload','wp-content','large','photo','wp-image'];var POSITIVE_LEAD_IMAGE_URL_HINTS_RE=new RegExp(POSITIVE_LEAD_IMAGE_URL_HINTS.join('|'),'i');var NEGATIVE_LEAD_IMAGE_URL_HINTS=['spacer','sprite','blank','throbber','gradient','tile','bg','background','icon','social','header','hdr','advert','spinner','loader','loading','default','rating','share','facebook','twitter','theme','promo','ads','wp-includes'];var NEGATIVE_LEAD_IMAGE_URL_HINTS_RE=new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'),'i');var GIF_RE=/\.gif(\?.*)?$/i;var JPG_RE=/\.jpe?g(\?.*)?$/i;function getSig($node){return($node.attr('class')||'')+' '+($node.attr('id')||'');}// Scores image urls based on a variety of heuristics.
+function scoreImageUrl(url){url=url.trim();var score=0;if(POSITIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)){score+=20;}if(NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)){score-=20;}// TODO: We might want to consider removing this as
+// gifs are much more common/popular than they once were
+if(GIF_RE.test(url)){score-=10;}if(JPG_RE.test(url)){score+=10;}// PNGs are neutral.
+return score;}// Alt attribute usually means non-presentational image.
+function scoreAttr($img){if($img.attr('alt')){return 5;}return 0;}// Look through our parent and grandparent for figure-like
+// container elements, give a bonus if we find them
+function scoreByParents($img){var score=0;var $figParent=$img.parents('figure').first();if($figParent.length===1){score+=25;}var $parent=$img.parent();var $gParent=void 0;if($parent.length===1){$gParent=$parent.parent();}[$parent,$gParent].forEach(function($node){if(PHOTO_HINTS_RE$1$1.test(getSig($node))){score+=15;}});return score;}// Look at our immediate sibling and see if it looks like it's a
+// caption. Bonus if so.
+function scoreBySibling($img){var score=0;var $sibling=$img.next();var sibling=$sibling.get(0);if(sibling&&sibling.tagName==='figcaption'){score+=25;}if(PHOTO_HINTS_RE$1$1.test(getSig($sibling))){score+=15;}return score;}function scoreByDimensions($img){var score=0;var width=parseFloat($img.attr('width'));var height=parseFloat($img.attr('height'));var src=$img.attr('src');// Penalty for skinny images
+if(width&&width<=50){score-=50;}// Penalty for short images
+if(height&&height<=50){score-=50;}if(width&&height&&!src.includes('sprite')){var area=width*height;if(area<5000){// Smaller than 50 x 100
+score-=100;}else{score+=Math.round(area/1000);}}return score;}function scoreByPosition($imgs,index){return $imgs.length/2-index;}// Given a resource, try to find the lead image URL from within
+// it. Like content and next page extraction, uses a scoring system
+// to determine what the most likely image may be. Short circuits
+// on really probable things like og:image meta tags.
//
-// if not clean_value and node.attrib.get('value'):
-// clean_value = self.clean(node.attrib['value'])
+// Potential signals to still take advantage of:
+// * domain
+// * weird aspect ratio
+var GenericLeadImageUrlExtractor={extract:function extract(_ref){var $=_ref.$,content=_ref.content,metaCache=_ref.metaCache,html=_ref.html;var cleanUrl=void 0;if(!$.browser&&$('head').length===0){$('*').first().prepend(html);}// Check to see if we have a matching meta tag that we can make use of.
+// Moving this higher because common practice is now to use large
+// images on things like Open Graph or Twitter cards.
+// images usually have for things like Open Graph.
+var imageUrl=extractFromMeta$$1($,LEAD_IMAGE_URL_META_TAGS,metaCache,false);if(imageUrl){cleanUrl=clean$1(imageUrl);if(cleanUrl)return cleanUrl;}// Next, try to find the "best" image via the content.
+// We'd rather not have to fetch each image and check dimensions,
+// so try to do some analysis and determine them instead.
+var $content=$(content);var imgs=$('img',$content).toArray();var imgScores={};imgs.forEach(function(img,index){var $img=$(img);var src=$img.attr('src');if(!src)return;var score=scoreImageUrl(src);score+=scoreAttr($img);score+=scoreByParents($img);score+=scoreBySibling($img);score+=scoreByDimensions($img);score+=scoreByPosition(imgs,index);imgScores[src]=score;});var _Reflect$ownKeys$redu=_Reflect$ownKeys$1(imgScores).reduce(function(acc,key){return imgScores[key]>acc[1]?[key,imgScores[key]]:acc;},[null,0]),_Reflect$ownKeys$redu2=_slicedToArray$1(_Reflect$ownKeys$redu,2),topUrl=_Reflect$ownKeys$redu2[0],topScore=_Reflect$ownKeys$redu2[1];if(topScore>0){cleanUrl=clean$1(topUrl);if(cleanUrl)return cleanUrl;}// If nothing else worked, check to see if there are any really
+// probable nodes in the doc, like .
+var _iteratorNormalCompletion=true;var _didIteratorError=false;var _iteratorError=undefined;try{for(var _iterator=_getIterator$1(LEAD_IMAGE_URL_SELECTORS),_step;!(_iteratorNormalCompletion=(_step=_iterator.next()).done);_iteratorNormalCompletion=true){var selector=_step.value;var $node=$(selector).first();var src=$node.attr('src');if(src){cleanUrl=clean$1(src);if(cleanUrl)return cleanUrl;}var href=$node.attr('href');if(href){cleanUrl=clean$1(href);if(cleanUrl)return cleanUrl;}var value=$node.attr('value');if(value){cleanUrl=clean$1(value);if(cleanUrl)return cleanUrl;}}}catch(err){_didIteratorError=true;_iteratorError=err;}finally{try{if(!_iteratorNormalCompletion&&_iterator.return){_iterator.return();}}finally{if(_didIteratorError){throw _iteratorError;}}}return null;}};// def extract(self):
+// """
+// # First, try to find the "best" image via the content.
+// # We'd rather not have to fetch each image and check dimensions,
+// # so try to do some analysis and determine them instead.
+// content = self.extractor.extract_content(return_type="node")
+// imgs = content.xpath('.//img')
+// img_scores = defaultdict(int)
+// logger.debug('Scoring %d images from content', len(imgs))
+// for (i, img) in enumerate(imgs):
+// img_score = 0
//
-// if clean_value:
-// logger.debug('Found lead image in probable nodes.')
-// logger.debug('Node was: %s', node)
-// return clean_value
+// if not 'src' in img.attrib:
+// logger.debug('No src attribute found')
+// continue
//
-// return None
-function scoreSimilarity(score,articleUrl,href){// Do this last and only if we have a real candidate, because it's
-// potentially expensive computationally. Compare the link to this
-// URL using difflib to get the % similarity of these URLs. On a
-// sliding scale, subtract points from this link based on
-// similarity.
-if(score>0){var similarity=new difflib$1.SequenceMatcher(null,articleUrl,href).ratio();// Subtract .1 from diff_percent when calculating modifier,
-// which means that if it's less than 10% different, we give a
-// bonus instead. Ex:
-// 3% different = +17.5 points
-// 10% different = 0 points
-// 20% different = -25 points
-var diffPercent=1.0-similarity;var diffModifier=-(250*(diffPercent-0.2));return score+diffModifier;}return 0;}function scoreLinkText(linkText,pageNum){// If the link text can be parsed as a number, give it a minor
-// bonus, with a slight bias towards lower numbered pages. This is
-// so that pages that might not have 'next' in their text can still
-// get scored, and sorted properly by score.
-var score=0;if(IS_DIGIT_RE.test(linkText.trim())){var linkTextAsNum=parseInt(linkText,10);// If it's the first page, we already got it on the first call.
-// Give it a negative score. Otherwise, up to page 10, give a
-// small bonus.
-if(linkTextAsNum<2){score=-30;}else{score=Math.max(0,10-linkTextAsNum);}// If it appears that the current page number is greater than
-// this links page number, it's a very bad sign. Give it a big
-// penalty.
-if(pageNum&&pageNum>=linkTextAsNum){score-=50;}}return score;}function scorePageInLink(pageNum,isWp){// page in the link = bonus. Intentionally ignore wordpress because
-// their ?p=123 link style gets caught by this even though it means
-// separate documents entirely.
-if(pageNum&&!isWp){return 50;}return 0;}var DIGIT_RE$2=/\d/;// A list of words that, if found in link text or URLs, likely mean that
-// this link is not a next page link.
-var EXTRANEOUS_LINK_HINTS$1=['print','archive','comment','discuss','e-mail','email','share','reply','all','login','sign','single','adx','entry-unrelated'];var EXTRANEOUS_LINK_HINTS_RE$1=new RegExp(EXTRANEOUS_LINK_HINTS$1.join('|'),'i');// Match any link text/classname/id that looks like it could mean the next
-// page. Things like: next, continue, >, >>, » but not >|, »| as those can
-// mean last page.
-var NEXT_LINK_TEXT_RE$1=new RegExp('(next|weiter|continue|>([^|]|$)|»([^|]|$))','i');// Match any link text/classname/id that looks like it is an end link: things
-// like "first", "last", "end", etc.
-var CAP_LINK_TEXT_RE$1=new RegExp('(first|last|end)','i');// Match any link text/classname/id that looks like it means the previous
-// page.
-var PREV_LINK_TEXT_RE$1=new RegExp('(prev|earl|old|new|<|«)','i');// Match any phrase that looks like it could be page, or paging, or pagination
-function scoreExtraneousLinks(href){// If the URL itself contains extraneous values, give a penalty.
-if(EXTRANEOUS_LINK_HINTS_RE$1.test(href)){return-25;}return 0;}function makeSig$1($link){return($link.attr('class')||'')+' '+($link.attr('id')||'');}function scoreByParents$1($link){// If a parent node contains paging-like classname or id, give a
-// bonus. Additionally, if a parent_node contains bad content
-// (like 'sponsor'), give a penalty.
-var $parent=$link.parent();var positiveMatch=false;var negativeMatch=false;var score=0;_Array$from(range(0,4)).forEach(function(){if($parent.length===0){return;}var parentData=makeSig$1($parent,' ');// If we have 'page' or 'paging' in our data, that's a good
-// sign. Add a bonus.
-if(!positiveMatch&&PAGE_RE.test(parentData)){positiveMatch=true;score+=25;}// If we have 'comment' or something in our data, and
-// we don't have something like 'content' as well, that's
-// a bad sign. Give a penalty.
-if(!negativeMatch&&NEGATIVE_SCORE_RE.test(parentData)&&EXTRANEOUS_LINK_HINTS_RE$1.test(parentData)){if(!POSITIVE_SCORE_RE.test(parentData)){negativeMatch=true;score-=25;}}$parent=$parent.parent();});return score;}function scorePrevLink(linkData){// If the link has something like "previous", its definitely
-// an old link, skip it.
-if(PREV_LINK_TEXT_RE$1.test(linkData)){return-200;}return 0;}function shouldScore(href,articleUrl,baseUrl,parsedUrl,linkText,previousUrls){// skip if we've already fetched this url
-if(previousUrls.find(function(url){return href===url;})!==undefined){return false;}// If we've already parsed this URL, or the URL matches the base
-// URL, or is empty, skip it.
-if(!href||href===articleUrl||href===baseUrl){return false;}var hostname=parsedUrl.hostname;var _URL$parse=URL$1.parse(href),linkHost=_URL$parse.hostname;// Domain mismatch.
-if(linkHost!==hostname){return false;}// If href doesn't contain a digit after removing the base URL,
-// it's certainly not the next page.
-var fragment=href.replace(baseUrl,'');if(!DIGIT_RE$2.test(fragment)){return false;}// This link has extraneous content (like "comment") in its link
-// text, so we skip it.
-if(EXTRANEOUS_LINK_HINTS_RE$1.test(linkText)){return false;}// Next page link text is never long, skip if it is too long.
-if(linkText.length>25){return false;}return true;}function scoreBaseUrl(href,baseRegex){// If the baseUrl isn't part of this URL, penalize this
-// link. It could still be the link, but the odds are lower.
-// Example:
-// http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html
-if(!baseRegex.test(href)){return-25;}return 0;}function scoreNextLinkText(linkData){// Things like "next", ">>", etc.
-if(NEXT_LINK_TEXT_RE$1.test(linkData)){return 50;}return 0;}function scoreCapLinks(linkData){// Cap links are links like "last", etc.
-if(CAP_LINK_TEXT_RE$1.test(linkData)){// If we found a link like "last", but we've already seen that
-// this link is also "next", it's fine. If it's not been
-// previously marked as "next", then it's probably bad.
-// Penalize.
-if(NEXT_LINK_TEXT_RE$1.test(linkData)){return-65;}}return 0;}function makeBaseRegex(baseUrl){return new RegExp('^'+baseUrl,'i');}function makeSig($link,linkText){return(linkText||$link.text())+' '+($link.attr('class')||'')+' '+($link.attr('id')||'');}function scoreLinks(_ref){var links=_ref.links,articleUrl=_ref.articleUrl,baseUrl=_ref.baseUrl,parsedUrl=_ref.parsedUrl,$=_ref.$,_ref$previousUrls=_ref.previousUrls,previousUrls=_ref$previousUrls===undefined?[]:_ref$previousUrls;parsedUrl=parsedUrl||URL$1.parse(articleUrl);var baseRegex=makeBaseRegex(baseUrl);var isWp=isWordpress($);// Loop through all links, looking for hints that they may be next-page
-// links. Things like having "page" in their textContent, className or
-// id, or being a child of a node with a page-y className or id.
+// try:
+// parsed_img = urlparse(img.attrib['src'])
+// img_path = parsed_img.path.lower()
+// except ValueError:
+// logger.debug('ValueError getting img path.')
+// continue
+// logger.debug('Image path is %s', img_path)
//
-// After we do that, assign each page a score, and pick the one that
-// looks most like the next page link, as long as its score is strong
-// enough to have decent confidence.
-var scoredPages=links.reduce(function(possiblePages,link){// Remove any anchor data since we don't do a good job
-// standardizing URLs (it's hard), we're going to do
-// some checking with and without a trailing slash
-var href=removeAnchor(link.attribs.href);var $link=$(link);var linkText=$link.text();if(!shouldScore(href,articleUrl,baseUrl,parsedUrl,linkText,previousUrls)){return possiblePages;}// ## PASSED THE FIRST-PASS TESTS. Start scoring. ##
-if(!possiblePages[href]){possiblePages[href]={score:0,linkText:linkText,href:href};}else{possiblePages[href].linkText=possiblePages[href].linkText+'|'+linkText;}var possiblePage=possiblePages[href];var linkData=makeSig($link,linkText);var pageNum=pageNumFromUrl(href);var score=scoreBaseUrl(href,baseRegex);score+=scoreNextLinkText(linkData);score+=scoreCapLinks(linkData);score+=scorePrevLink(linkData);score+=scoreByParents$1($link);score+=scoreExtraneousLinks(href);score+=scorePageInLink(pageNum,isWp);score+=scoreLinkText(linkText,pageNum);score+=scoreSimilarity(score,articleUrl,href);possiblePage.score=score;return possiblePages;},{});return _Reflect$ownKeys$1(scoredPages).length===0?null:scoredPages;}// Looks for and returns next page url
-// for multi-page articles
-var GenericNextPageUrlExtractor={extract:function extract(_ref){var $=_ref.$,url=_ref.url,parsedUrl=_ref.parsedUrl,_ref$previousUrls=_ref.previousUrls,previousUrls=_ref$previousUrls===undefined?[]:_ref$previousUrls;parsedUrl=parsedUrl||URL$1.parse(url);var articleUrl=removeAnchor(url);var baseUrl=articleBaseUrl(url,parsedUrl);var links=$('a[href]').toArray();var scoredLinks=scoreLinks({links:links,articleUrl:articleUrl,baseUrl:baseUrl,parsedUrl:parsedUrl,$:$,previousUrls:previousUrls});// If no links were scored, return null
-if(!scoredLinks)return null;// now that we've scored all possible pages,
-// find the biggest one.
-var topPage=_Reflect$ownKeys$1(scoredLinks).reduce(function(acc,link){var scoredLink=scoredLinks[link];return scoredLink.score>acc.score?scoredLink:acc;},{score:-100});// If the score is less than 50, we're not confident enough to use it,
-// so we fail.
-if(topPage.score>=50){return topPage.href;}return null;}};var CANONICAL_META_SELECTORS=['og:url'];function parseDomain(url){var parsedUrl=URL$1.parse(url);var hostname=parsedUrl.hostname;return hostname;}function result(url){return{url:url,domain:parseDomain(url)};}var GenericUrlExtractor={extract:function extract(_ref){var $=_ref.$,url=_ref.url,metaCache=_ref.metaCache;var $canonical=$('link[rel=canonical]');if($canonical.length!==0){var href=$canonical.attr('href');if(href){return result(href);}}var metaUrl=extractFromMeta$$1($,CANONICAL_META_SELECTORS,metaCache);if(metaUrl){return result(metaUrl);}return result(url);}};var EXCERPT_META_SELECTORS=['og:description','twitter:description'];function clean$2(content,$){var maxLength=arguments.length>2&&arguments[2]!==undefined?arguments[2]:200;content=content.replace(/[\s\n]+/g,' ').trim();return ellipsize$1(content,maxLength,{ellipse:'…'});}var GenericExcerptExtractor={extract:function extract(_ref){var $=_ref.$,content=_ref.content,metaCache=_ref.metaCache;var excerpt=extractFromMeta$$1($,EXCERPT_META_SELECTORS,metaCache);if(excerpt){return clean$2(stripTags(excerpt,$));}// Fall back to excerpting from the extracted content
-var maxLength=200;var shortContent=content.slice(0,maxLength*5);return clean$2($(shortContent).text(),$,maxLength);}};var GenericWordCountExtractor={extract:function extract(_ref){var content=_ref.content;var $=cheerio$1.load(content);var text=normalizeSpaces($('div').first().text());return text.split(/\s/).length;}};var GenericExtractor={// This extractor is the default for all domains
-domain:'*',title:GenericTitleExtractor.extract,date_published:GenericDatePublishedExtractor.extract,author:GenericAuthorExtractor.extract,content:GenericContentExtractor.extract.bind(GenericContentExtractor),lead_image_url:GenericLeadImageUrlExtractor.extract,dek:GenericDekExtractor.extract,next_page_url:GenericNextPageUrlExtractor.extract,url_and_domain:GenericUrlExtractor.extract,excerpt:GenericExcerptExtractor.extract,word_count:GenericWordCountExtractor.extract,direction:function direction(_ref){var title=_ref.title;return stringDirection$1.getDirection(title);},extract:function extract(options){var html=options.html;if(html){var $=cheerio$1.load(html);options.$=$;}var title=this.title(options);var date_published=this.date_published(options);var author=this.author(options);var content=this.content(_extends$1({},options,{title:title}));var lead_image_url=this.lead_image_url(_extends$1({},options,{content:content}));var dek=this.dek(_extends$1({},options,{content:content}));var next_page_url=this.next_page_url(options);var excerpt=this.excerpt(_extends$1({},options,{content:content}));var word_count=this.word_count(_extends$1({},options,{content:content}));var direction=this.direction({title:title});var _url_and_domain=this.url_and_domain(options),url=_url_and_domain.url,domain=_url_and_domain.domain;return{title:title,author:author,date_published:date_published||null,dek:dek,lead_image_url:lead_image_url,content:content,next_page_url:next_page_url,url:url,domain:domain,excerpt:excerpt,word_count:word_count,direction:direction};}};function getExtractor(url,parsedUrl){parsedUrl=parsedUrl||URL$1.parse(url);var _parsedUrl=parsedUrl,hostname=_parsedUrl.hostname;var baseDomain=hostname.split('.').slice(-2).join('.');return Extractors[hostname]||Extractors[baseDomain]||GenericExtractor;}// Remove elements by an array of selectors
-function cleanBySelectors($content,$,_ref){var clean=_ref.clean;if(!clean)return $content;$(clean.join(','),$content).remove();return $content;}// Transform matching elements
-function transformElements($content,$,_ref2){var transforms=_ref2.transforms;if(!transforms)return $content;_Reflect$ownKeys$1(transforms).forEach(function(key){var $matches=$(key,$content);var value=transforms[key];// If value is a string, convert directly
-if(typeof value==='string'){$matches.each(function(index,node){convertNodeTo($(node),$,transforms[key]);});}else if(typeof value==='function'){// If value is function, apply function to node
-$matches.each(function(index,node){var result=value($(node),$);// If function returns a string, convert node to that value
-if(typeof result==='string'){convertNodeTo($(node),$,result);}});}});return $content;}function findMatchingSelector($,selectors){return selectors.find(function(selector){if(Array.isArray(selector)){var _selector=_slicedToArray$1(selector,2),s=_selector[0],attr=_selector[1];return $(s).length===1&&$(s).attr(attr)&&$(s).attr(attr).trim()!=='';}return $(selector).length===1&&$(selector).text().trim()!=='';});}function select(opts){var $=opts.$,type=opts.type,extractionOpts=opts.extractionOpts,_opts$extractHtml=opts.extractHtml,extractHtml=_opts$extractHtml===undefined?false:_opts$extractHtml;// Skip if there's not extraction for this type
-if(!extractionOpts)return null;// If a string is hardcoded for a type (e.g., Wikipedia
-// contributors), return the string
-if(typeof extractionOpts==='string')return extractionOpts;var selectors=extractionOpts.selectors,_extractionOpts$defau=extractionOpts.defaultCleaner,defaultCleaner=_extractionOpts$defau===undefined?true:_extractionOpts$defau;var matchingSelector=findMatchingSelector($,selectors);if(!matchingSelector)return null;// Declaring result; will contain either
-// text or html, which will be cleaned
-// by the appropriate cleaner type
-// If the selector type requests html as its return type
-// transform and clean the element with provided selectors
-if(extractHtml){var $content=$(matchingSelector);// Wrap in div so transformation can take place on root element
-$content.wrap($(''));$content=$content.parent();$content=transformElements($content,$,extractionOpts);$content=cleanBySelectors($content,$,extractionOpts);$content=Cleaners[type]($content,_extends$1({},opts,{defaultCleaner:defaultCleaner}));return $.html($content);}var result=void 0;// if selector is an array (e.g., ['img', 'src']),
-// extract the attr
-if(Array.isArray(matchingSelector)){var _matchingSelector=_slicedToArray$1(matchingSelector,2),selector=_matchingSelector[0],attr=_matchingSelector[1];result=$(selector).attr(attr).trim();}else{result=$(matchingSelector).text().trim();}// Allow custom extractor to skip default cleaner
-// for this type; defaults to true
-if(defaultCleaner){return Cleaners[type](result,opts);}return result;}function extractResult(opts){var type=opts.type,extractor=opts.extractor,_opts$fallback=opts.fallback,fallback=_opts$fallback===undefined?true:_opts$fallback;var result=select(_extends$1({},opts,{extractionOpts:extractor[type]}));// If custom parser succeeds, return the result
-if(result){return result;}// If nothing matches the selector, and fallback is enabled,
-// run the Generic extraction
-if(fallback)return GenericExtractor[type](opts);return null;}var RootExtractor={extract:function extract(){var extractor=arguments.length>0&&arguments[0]!==undefined?arguments[0]:GenericExtractor;var opts=arguments[1];var _opts=opts,contentOnly=_opts.contentOnly,extractedTitle=_opts.extractedTitle;// This is the generic extractor. Run its extract method
-if(extractor.domain==='*')return extractor.extract(opts);opts=_extends$1({},opts,{extractor:extractor});if(contentOnly){var _content=extractResult(_extends$1({},opts,{type:'content',extractHtml:true,title:extractedTitle}));return{content:_content};}var title=extractResult(_extends$1({},opts,{type:'title'}));var date_published=extractResult(_extends$1({},opts,{type:'date_published'}));var author=extractResult(_extends$1({},opts,{type:'author'}));var next_page_url=extractResult(_extends$1({},opts,{type:'next_page_url'}));var content=extractResult(_extends$1({},opts,{type:'content',extractHtml:true,title:title}));var lead_image_url=extractResult(_extends$1({},opts,{type:'lead_image_url',content:content}));var excerpt=extractResult(_extends$1({},opts,{type:'excerpt',content:content}));var dek=extractResult(_extends$1({},opts,{type:'dek',content:content,excerpt:excerpt}));var word_count=extractResult(_extends$1({},opts,{type:'word_count',content:content}));var direction=extractResult(_extends$1({},opts,{type:'direction',title:title}));var _ref3=extractResult(_extends$1({},opts,{type:'url_and_domain'}))||{url:null,domain:null},url=_ref3.url,domain=_ref3.domain;return{title:title,content:content,author:author,date_published:date_published,lead_image_url:lead_image_url,dek:dek,next_page_url:next_page_url,url:url,domain:domain,excerpt:excerpt,word_count:word_count,direction:direction};}};var collectAllPages=function(){var _ref=_asyncToGenerator(_regeneratorRuntime.mark(function _callee(_ref2){var next_page_url=_ref2.next_page_url,html=_ref2.html,$=_ref2.$,metaCache=_ref2.metaCache,result=_ref2.result,Extractor=_ref2.Extractor,title=_ref2.title,url=_ref2.url;var pages,previousUrls,extractorOpts,nextPageResult,word_count;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:// At this point, we've fetched just the first page
-pages=1;previousUrls=[removeAnchor(url)];// If we've gone over 26 pages, something has
-// likely gone wrong.
-case 2:if(!(next_page_url&&pages<26)){_context.next=15;break;}pages+=1;_context.next=6;return Resource.create(next_page_url);case 6:$=_context.sent;html=$.html();extractorOpts={url:next_page_url,html:html,$:$,metaCache:metaCache,contentOnly:true,extractedTitle:title,previousUrls:previousUrls};nextPageResult=RootExtractor.extract(Extractor,extractorOpts);previousUrls.push(next_page_url);result=_extends$1({},result,{content:'\n '+result.content+'\n \n
'});return _context.abrupt('return',_extends$1({},result,{total_pages:pages,pages_rendered:pages,word_count:word_count}));case 17:case'end':return _context.stop();}}},_callee,this);}));function collectAllPages(_x){return _ref.apply(this,arguments);}return collectAllPages;}();var Mercury={parse:function parse(url,html){var _this=this;var opts=arguments.length>2&&arguments[2]!==undefined?arguments[2]:{};return _asyncToGenerator(_regeneratorRuntime.mark(function _callee(){var _opts$fetchAllPages,fetchAllPages,_opts$fallback,fallback,parsedUrl,Extractor,$,metaCache,result,_result,title,next_page_url;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:_opts$fetchAllPages=opts.fetchAllPages,fetchAllPages=_opts$fetchAllPages===undefined?true:_opts$fetchAllPages,_opts$fallback=opts.fallback,fallback=_opts$fallback===undefined?true:_opts$fallback;parsedUrl=URL$1.parse(url);if(validateUrl(parsedUrl)){_context.next=4;break;}return _context.abrupt('return',Errors.badUrl);case 4:Extractor=getExtractor(url,parsedUrl);// console.log(`Using extractor for ${Extractor.domain}`);
-_context.next=7;return Resource.create(url,html,parsedUrl);case 7:$=_context.sent;if(!$.error){_context.next=10;break;}return _context.abrupt('return',$);case 10:html=$.html();// Cached value of every meta name in our document.
-// Used when extracting title/author/date_published/dek
-metaCache=$('meta').map(function(_,node){return $(node).attr('name');}).toArray();result=RootExtractor.extract(Extractor,{url:url,html:html,$:$,metaCache:metaCache,parsedUrl:parsedUrl,fallback:fallback});_result=result,title=_result.title,next_page_url=_result.next_page_url;// Fetch more pages if next_page_url found
-if(!(fetchAllPages&&next_page_url)){_context.next=20;break;}_context.next=17;return collectAllPages({Extractor:Extractor,next_page_url:next_page_url,html:html,$:$,metaCache:metaCache,result:result,title:title,url:url});case 17:result=_context.sent;_context.next=21;break;case 20:result=_extends$1({},result,{total_pages:1,rendered_pages:1});case 21:return _context.abrupt('return',result);case 22:case'end':return _context.stop();}}},_callee,_this);}))();},// A convenience method for getting a resource
-// to work with, e.g., for custom extractor generator
-fetchResource:function fetchResource(url){var _this2=this;return _asyncToGenerator(_regeneratorRuntime.mark(function _callee2(){return _regeneratorRuntime.wrap(function _callee2$(_context2){while(1){switch(_context2.prev=_context2.next){case 0:_context2.next=2;return Resource.create(url);case 2:return _context2.abrupt('return',_context2.sent);case 3:case'end':return _context2.stop();}}},_callee2,_this2);}))();}};var mercury=Mercury;
-
-// Spacer images to be removed
-var SPACER_RE$1 = new RegExp('trans|transparent|spacer|blank', 'i');
-
-// The class we will use to mark elements we want to keep
-// but would normally remove
-var KEEP_CLASS$1 = 'mercury-parser-keep';
-
-var KEEP_SELECTORS$1 = ['iframe[src^="https://www.youtube.com"]', 'iframe[src^="http://www.youtube.com"]', 'iframe[src^="https://player.vimeo"]', 'iframe[src^="http://player.vimeo"]'];
-
-// A list of tags to strip from the output if we encounter them.
-var STRIP_OUTPUT_TAGS$1 = ['title', 'script', 'noscript', 'link', 'style', 'hr', 'embed', 'iframe', 'object'];
-
-// cleanAttributes
-var REMOVE_ATTRS$1 = ['style', 'align'];
-var REMOVE_ATTR_SELECTORS$1 = REMOVE_ATTRS$1.map(function (selector) {
- return '[' + selector + ']';
-});
-var REMOVE_ATTR_LIST$1 = REMOVE_ATTRS$1.join(',');
-var WHITELIST_ATTRS$1 = ['src', 'srcset', 'href', 'class', 'id', 'alt'];
-var WHITELIST_ATTRS_RE$1 = new RegExp('^(' + WHITELIST_ATTRS$1.join('|') + ')$', 'i');
-
-// removeEmpty
-var REMOVE_EMPTY_TAGS$1 = ['p'];
-var REMOVE_EMPTY_SELECTORS$1 = REMOVE_EMPTY_TAGS$1.map(function (tag) {
- return tag + ':empty';
-}).join(',');
-
-// cleanTags
-var CLEAN_CONDITIONALLY_TAGS$1 = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');
-
-// cleanHeaders
-var HEADER_TAGS$1 = ['h2', 'h3', 'h4', 'h5', 'h6'];
-var HEADER_TAG_LIST$1 = HEADER_TAGS$1.join(',');
-
-// // CONTENT FETCHING CONSTANTS ////
-
-// A list of strings that can be considered unlikely candidates when
-// extracting content from a resource. These strings are joined together
-// and then tested for existence using re:test, so may contain simple,
-// non-pipe style regular expression queries if necessary.
-var UNLIKELY_CANDIDATES_BLACKLIST$1$1 = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot',
-// 'form', // This is too generic, has too many false positives
-'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.
-'menu', 'meta', 'nav', 'outbrain', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box
-'presence_control_external', // lifehacker.com container full of false positives
-'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'taboola', 'tools'];
-
-// A list of strings that can be considered LIKELY candidates when
-// extracting content from a resource. Essentially, the inverse of the
-// blacklist above - if something matches both blacklist and whitelist,
-// it is kept. This is useful, for example, if something has a className
-// of "rss-content entry-content". It matched 'rss', so it would normally
-// be removed, however, it's also the entry content, so it should be left
-// alone.
+// if constants.POSITIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):
+// logger.debug('Positive URL hints match. Adding 20.')
+// img_score += 20
//
-// These strings are joined together and then tested for existence using
-// re:test, so may contain simple, non-pipe style regular expression queries
-// if necessary.
-var UNLIKELY_CANDIDATES_WHITELIST$1$1 = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form
-'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];
-
-// A list of tags which, if found inside, should cause a to NOT
-// be turned into a paragraph tag. Shallow div tags without these elements
-// should be turned into tags.
-var DIV_TO_P_BLOCK_TAGS$1$1 = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');
-
-// A list of tags that should be ignored when trying to find the top candidate
-// for a document.
-
-
-
-
-// A list of selectors that specify, very clearly, either hNews or other
-// very content-specific style content, like Blogger templates.
-// More examples here: http://microformats.org/wiki/blog-post-formats
-
-
-
-
-
-// A list of strings that denote a positive scoring for this content as being
-// an article container. Checked against className and id.
+// if constants.NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):
+// logger.debug('Negative URL hints match. Subtracting 20.')
+// img_score -= 20
//
-// TODO: Perhaps have these scale based on their odds of being quality?
-
-
-// The above list, joined into a matching regular expression
-
-
-// Readability publisher-specific guidelines
-
-
-// A list of strings that denote a negative scoring for this content as being
-// an article container. Checked against className and id.
+// # Gifs are more often structure than photos
+// if img_path.endswith('gif'):
+// logger.debug('gif found. Subtracting 10.')
+// img_score -= 10
//
-// TODO: Perhaps have these scale based on their odds of being quality?
-
-// The above list, joined into a matching regular expression
-
-
-// XPath to try to determine if a page is wordpress. Not always successful.
-var IS_WP_SELECTOR$1 = 'meta[name=generator][value^=WordPress]';
-
-// Match a digit. Pretty clear.
-
-
-// A list of words that, if found in link text or URLs, likely mean that
-// this link is not a next page link.
-
-
-
-// Match any phrase that looks like it could be page, or paging, or pagination
-
-
-// Match any link text/classname/id that looks like it could mean the next
-// page. Things like: next, continue, >, >>, » but not >|, »| as those can
-// mean last page.
-// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\|]|$)|»([^\|]|$))', 'i');
-
-
-// Match any link text/classname/id that looks like it is an end link: things
-// like "first", "last", "end", etc.
-
-
-// Match any link text/classname/id that looks like it means the previous
-// page.
-
-
-// Match 2 or more consecutive tags
-
-
-// Match 1 BR tag.
-
-
-// A list of all of the block level tags known in HTML5 and below. Taken from
-// http://bit.ly/qneNIT
-var BLOCK_LEVEL_TAGS$1 = ['article', 'aside', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'col', 'colgroup', 'dd', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'map', 'object', 'ol', 'output', 'p', 'pre', 'progress', 'section', 'table', 'tbody', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'ul', 'video'];
-var BLOCK_LEVEL_TAGS_RE$1 = new RegExp('^(' + BLOCK_LEVEL_TAGS$1.join('|') + ')$', 'i');
-
-// The removal is implemented as a blacklist and whitelist, this test finds
-// blacklisted elements that aren't whitelisted. We do this all in one
-// expression-both because it's only one pass, and because this skips the
-// serialization for whitelisted nodes.
-var candidatesBlacklist$1$1 = UNLIKELY_CANDIDATES_BLACKLIST$1$1.join('|');
-var CANDIDATES_BLACKLIST$1 = new RegExp(candidatesBlacklist$1$1, 'i');
-
-var candidatesWhitelist$1$1 = UNLIKELY_CANDIDATES_WHITELIST$1$1.join('|');
-var CANDIDATES_WHITELIST$1 = new RegExp(candidatesWhitelist$1$1, 'i');
-
-// ## NOTES:
-// Another good candidate for refactoring/optimizing.
-// Very imperative code, I don't love it. - AP
-
-// Given cheerio object, convert consecutive tags into
-// tags instead.
+// # JPGs are more often photographs
+// if img_path.endswith('jpg'):
+// logger.debug('jpg found. Adding 10.')
+// img_score += 10
//
-// :param $: A cheerio object
-
-function brsToPs$$1($) {
- var collapsing = false;
- $('br').each(function (index, element) {
- var nextElement = $(element).next().get(0);
-
- if (nextElement && nextElement.tagName === 'br') {
- collapsing = true;
- $(element).remove();
- } else if (collapsing) {
- collapsing = false;
- // $(element).replaceWith('')
- paragraphize$1(element, $, true);
- }
- });
-
- return $;
-}
-
-// Given a node, turn it into a P if it is not already a P, and
-// make sure it conforms to the constraints of a P tag (I.E. does
-// not contain any other block tags.)
+// # PNGs are neutral.
//
-// If the node is a , it treats the following inline siblings
-// as if they were its children.
+// # Alt attribute usually means non-presentational image.
+// if 'alt' in img.attrib and len(img.attrib['alt']) > 5:
+// logger.debug('alt attribute found. Adding 5.')
+// img_score += 5
//
-// :param node: The node to paragraphize; this is a raw node
-// :param $: The cheerio object to handle dom manipulation
-// :param br: Whether or not the passed node is a br
-
-function paragraphize$1(node, $) {
- var br = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
-
- var $node = $(node);
-
- if (br) {
- var sibling = node.nextSibling;
- var p = $('');
-
- // while the next node is text or not a block level element
- // append it to a new p node
- while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE$1.test(sibling.tagName))) {
- var nextSibling = sibling.nextSibling;
- $(sibling).appendTo(p);
- sibling = nextSibling;
- }
-
- $node.replaceWith(p);
- $node.remove();
- return $;
- }
-
- return $;
-}
-
-function convertDivs$1($) {
- $('div').each(function (index, div) {
- var $div = $(div);
- var convertable = $div.children(DIV_TO_P_BLOCK_TAGS$1$1).length === 0;
-
- if (convertable) {
- convertNodeTo$1($div, $, 'p');
- }
- });
-
- return $;
-}
-
-function convertSpans$1$1($) {
- $('span').each(function (index, span) {
- var $span = $(span);
- var convertable = $span.parents('p, div').length === 0;
- if (convertable) {
- convertNodeTo$1($span, $, 'p');
- }
- });
-
- return $;
-}
-
-function convertNodeTo$1($node, $) {
- var tag = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'p';
-
- var node = $node.get(0);
- if (!node) {
- return $;
- }
-
- var _$node$get = $node.get(0),
- attribs = _$node$get.attribs;
-
- var attribString = _Reflect$ownKeys(attribs).map(function (key) {
- return key + '=' + attribs[key];
- }).join(' ');
-
- $node.replaceWith('<' + tag + ' ' + attribString + '>' + $node.contents() + '' + tag + '>');
- return $;
-}
-
-function cleanForHeight$1($img, $) {
- var height = parseInt($img.attr('height'), 10);
- var width = parseInt($img.attr('width'), 10) || 20;
-
- // Remove images that explicitly have very small heights or
- // widths, because they are most likely shims or icons,
- // which aren't very useful for reading.
- if ((height || 20) < 10 || width < 10) {
- $img.remove();
- } else if (height) {
- // Don't ever specify a height on images, so that we can
- // scale with respect to width without screwing up the
- // aspect ratio.
- $img.removeAttr('height');
- }
-
- return $;
-}
-
-// Cleans out images where the source string matches transparent/spacer/etc
-// TODO This seems very aggressive - AP
-function removeSpacers$1($img, $) {
- if (SPACER_RE$1.test($img.attr('src'))) {
- $img.remove();
- }
-
- return $;
-}
-
-function stripJunkTags$1(article, $) {
- var tags = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : [];
-
- if (tags.length === 0) {
- tags = STRIP_OUTPUT_TAGS$1;
- }
-
- // Remove matching elements, but ignore
- // any element with a class of mercury-parser-keep
- $(tags.join(','), article).not('.' + KEEP_CLASS$1).remove();
-
- // Remove the mercury-parser-keep class from result
- $('.' + KEEP_CLASS$1, article).removeClass(KEEP_CLASS$1);
-
- return $;
-}
-
-function removeAllButWhitelist$1($article) {
- $article.find('*').each(function (index, node) {
- node.attribs = _Reflect$ownKeys(node.attribs).reduce(function (acc, attr) {
- if (WHITELIST_ATTRS_RE$1.test(attr)) {
- return _extends({}, acc, defineProperty({}, attr, node.attribs[attr]));
- }
-
- return acc;
- }, {});
- });
-
- return $article;
-}
-
-// // CONTENT FETCHING CONSTANTS ////
-
-// A list of strings that can be considered unlikely candidates when
-// extracting content from a resource. These strings are joined together
-// and then tested for existence using re:test, so may contain simple,
-// non-pipe style regular expression queries if necessary.
-var UNLIKELY_CANDIDATES_BLACKLIST$2 = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot', 'form', 'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.
-'menu', 'meta', 'nav', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box
-'presence_control_external', // lifehacker.com container full of false positives
-'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'tools'];
-
-// A list of strings that can be considered LIKELY candidates when
-// extracting content from a resource. Essentially, the inverse of the
-// blacklist above - if something matches both blacklist and whitelist,
-// it is kept. This is useful, for example, if something has a className
-// of "rss-content entry-content". It matched 'rss', so it would normally
-// be removed, however, it's also the entry content, so it should be left
-// alone.
+// # Look through our parent and grandparent for figure-like
+// # container elements, give a bonus if we find them
+// parents = [img.getparent()]
+// if parents[0] is not None and parents[0].getparent() is not None:
+// parents.append(parents[0].getparent())
+// for p in parents:
+// if p.tag == 'figure':
+// logger.debug('Parent with
tag found. Adding 25.')
+// img_score += 25
//
-// These strings are joined together and then tested for existence using
-// re:test, so may contain simple, non-pipe style regular expression queries
-// if necessary.
-var UNLIKELY_CANDIDATES_WHITELIST$2 = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form
-'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];
-
-// A list of tags which, if found inside, should cause a to NOT
-// be turned into a paragraph tag. Shallow div tags without these elements
-// should be turned into tags.
-var DIV_TO_P_BLOCK_TAGS$2 = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');
-
-// A list of tags that should be ignored when trying to find the top candidate
-// for a document.
-var NON_TOP_CANDIDATE_TAGS$1$1 = ['br', 'b', 'i', 'label', 'hr', 'area', 'base', 'basefont', 'input', 'img', 'link', 'meta'];
-
-var NON_TOP_CANDIDATE_TAGS_RE$1$1 = new RegExp('^(' + NON_TOP_CANDIDATE_TAGS$1$1.join('|') + ')$', 'i');
-
-// A list of selectors that specify, very clearly, either hNews or other
-// very content-specific style content, like Blogger templates.
-// More examples here: http://microformats.org/wiki/blog-post-formats
-var HNEWS_CONTENT_SELECTORS$1$1 = [['.hentry', '.entry-content'], ['entry', '.entry-content'], ['.entry', '.entry_content'], ['.post', '.postbody'], ['.post', '.post_body'], ['.post', '.post-body']];
-
-var PHOTO_HINTS$1$1 = ['figure', 'photo', 'image', 'caption'];
-var PHOTO_HINTS_RE$1$1 = new RegExp(PHOTO_HINTS$1$1.join('|'), 'i');
-
-// A list of strings that denote a positive scoring for this content as being
-// an article container. Checked against className and id.
+// p_sig = ' '.join([p.get('id', ''), p.get('class', '')])
+// if constants.PHOTO_HINTS_RE.search(p_sig):
+// logger.debug('Photo hints regex match. Adding 15.')
+// img_score += 15
//
-// TODO: Perhaps have these scale based on their odds of being quality?
-var POSITIVE_SCORE_HINTS$2 = ['article', 'articlecontent', 'instapaper_body', 'blog', 'body', 'content', 'entry-content-asset', 'entry', 'hentry', 'main', 'Normal', 'page', 'pagination', 'permalink', 'post', 'story', 'text', '[-_]copy', // usatoday
-'\\Bcopy'];
-
-// The above list, joined into a matching regular expression
-var POSITIVE_SCORE_RE$2 = new RegExp(POSITIVE_SCORE_HINTS$2.join('|'), 'i');
-
-// Readability publisher-specific guidelines
-var READABILITY_ASSET$1$1 = new RegExp('entry-content-asset', 'i');
-
-// A list of strings that denote a negative scoring for this content as being
-// an article container. Checked against className and id.
+// # Look at our immediate sibling and see if it looks like it's a
+// # caption. Bonus if so.
+// sibling = img.getnext()
+// if sibling is not None:
+// if sibling.tag == 'figcaption':
+// img_score += 25
//
-// TODO: Perhaps have these scale based on their odds of being quality?
-var NEGATIVE_SCORE_HINTS$2 = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'bottom', 'byline', 'clear', 'com-', 'combx', 'comment', 'comment\\B', 'contact', 'copy', 'credit', 'crumb', 'date', 'deck', 'excerpt', 'featured', // tnr.com has a featured_content which throws us off
-'foot', 'footer', 'footnote', 'graf', 'head', 'info', 'infotext', // newscientist.com copyright
-'instapaper_ignore', 'jump', 'linebreak', 'link', 'masthead', 'media', 'meta', 'modal', 'outbrain', // slate.com junk
-'promo', 'pr_', // autoblog - press release
-'related', 'respond', 'roundcontent', // lifehacker restricted content warning
-'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget'];
-// The above list, joined into a matching regular expression
-var NEGATIVE_SCORE_RE$2 = new RegExp(NEGATIVE_SCORE_HINTS$2.join('|'), 'i');
-
-// Match a digit. Pretty clear.
-
-
-// Match 2 or more consecutive tags
-
-
-// Match 1 BR tag.
-
-
-// A list of all of the block level tags known in HTML5 and below. Taken from
-// http://bit.ly/qneNIT
-
-
-
-// The removal is implemented as a blacklist and whitelist, this test finds
-// blacklisted elements that aren't whitelisted. We do this all in one
-// expression-both because it's only one pass, and because this skips the
-// serialization for whitelisted nodes.
-var candidatesBlacklist$2 = UNLIKELY_CANDIDATES_BLACKLIST$2.join('|');
-
-
-var candidatesWhitelist$2 = UNLIKELY_CANDIDATES_WHITELIST$2.join('|');
-
-
-
-
-var PARAGRAPH_SCORE_TAGS$1$1 = new RegExp('^(p|li|span|pre)$', 'i');
-var CHILD_CONTENT_TAGS$1$1 = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');
-var BAD_TAGS$1$1 = new RegExp('^(address|form)$', 'i');
-
-// Get the score of a node based on its className and id.
-function getWeight$1(node) {
- var classes = node.attr('class');
- var id = node.attr('id');
- var score = 0;
-
- if (id) {
- // if id exists, try to score on both positive and negative
- if (POSITIVE_SCORE_RE$2.test(id)) {
- score += 25;
- }
- if (NEGATIVE_SCORE_RE$2.test(id)) {
- score -= 25;
- }
- }
-
- if (classes) {
- if (score === 0) {
- // if classes exist and id did not contribute to score
- // try to score on both positive and negative
- if (POSITIVE_SCORE_RE$2.test(classes)) {
- score += 25;
- }
- if (NEGATIVE_SCORE_RE$2.test(classes)) {
- score -= 25;
- }
- }
-
- // even if score has been set by id, add score for
- // possible photo matches
- // "try to keep photos if we can"
- if (PHOTO_HINTS_RE$1$1.test(classes)) {
- score += 10;
- }
-
- // add 25 if class matches entry-content-asset,
- // a class apparently instructed for use in the
- // Readability publisher guidelines
- // https://www.readability.com/developers/guidelines
- if (READABILITY_ASSET$1$1.test(classes)) {
- score += 25;
- }
- }
-
- return score;
-}
-
-// returns the score of a node based on
-// the node's score attribute
-// returns null if no score set
-function getScore$1($node) {
- return parseFloat($node.attr('score')) || null;
-}
-
-// return 1 for every comma in text
-function scoreCommas$1(text) {
- return (text.match(/,/g) || []).length;
-}
-
-var idkRe$1 = new RegExp('^(p|pre)$', 'i');
-
-function scoreLength$1(textLength) {
- var tagName = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'p';
-
- var chunks = textLength / 50;
-
- if (chunks > 0) {
- var lengthBonus = void 0;
-
- // No idea why p or pre are being tamped down here
- // but just following the source for now
- // Not even sure why tagName is included here,
- // since this is only being called from the context
- // of scoreParagraph
- if (idkRe$1.test(tagName)) {
- lengthBonus = chunks - 2;
- } else {
- lengthBonus = chunks - 1.25;
- }
-
- return Math.min(Math.max(lengthBonus, 0), 3);
- }
-
- return 0;
-}
-
-// Score a paragraph using various methods. Things like number of
-// commas, etc. Higher is better.
-function scoreParagraph$$1(node) {
- var score = 1;
- var text = node.text().trim();
- var textLength = text.length;
-
- // If this paragraph is less than 25 characters, don't count it.
- if (textLength < 25) {
- return 0;
- }
-
- // Add points for any commas within this paragraph
- score += scoreCommas$1(text);
-
- // For every 50 characters in this paragraph, add another point. Up
- // to 3 points.
- score += scoreLength$1(textLength);
-
- // Articles can end with short paragraphs when people are being clever
- // but they can also end with short paragraphs setting up lists of junk
- // that we strip. This negative tweaks junk setup paragraphs just below
- // the cutoff threshold.
- if (text.slice(-1) === ':') {
- score -= 1;
- }
-
- return score;
-}
-
-function setScore$1($node, $, score) {
- $node.attr('score', score);
- return $node;
-}
-
-function addScore$$1($node, $, amount) {
- try {
- var score = getOrInitScore$$1($node, $) + amount;
- setScore$1($node, $, score);
- } catch (e) {
- // Ignoring; error occurs in scoreNode
- }
-
- return $node;
-}
-
-// Adds 1/4 of a child's score to its parent
-function addToParent$$1(node, $, score) {
- var parent = node.parent();
- if (parent) {
- addScore$$1(parent, $, score * 0.25);
- }
-
- return node;
-}
-
-// gets and returns the score if it exists
-// if not, initializes a score based on
-// the node's tag type
-function getOrInitScore$$1($node, $) {
- var weightNodes = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : true;
-
- var score = getScore$1($node);
-
- if (score) {
- return score;
- }
-
- score = scoreNode$$1($node);
-
- if (weightNodes) {
- score += getWeight$1($node);
- }
-
- addToParent$$1($node, $, score);
-
- return score;
-}
-
-// Score an individual node. Has some smarts for paragraphs, otherwise
-// just scores based on tag.
-function scoreNode$$1($node) {
- var _$node$get = $node.get(0),
- tagName = _$node$get.tagName;
-
- // TODO: Consider ordering by most likely.
- // E.g., if divs are a more common tag on a page,
- // Could save doing that regex test on every node – AP
-
-
- if (PARAGRAPH_SCORE_TAGS$1$1.test(tagName)) {
- return scoreParagraph$$1($node);
- } else if (tagName === 'div') {
- return 5;
- } else if (CHILD_CONTENT_TAGS$1$1.test(tagName)) {
- return 3;
- } else if (BAD_TAGS$1$1.test(tagName)) {
- return -3;
- } else if (tagName === 'th') {
- return -5;
- }
-
- return 0;
-}
-
-function convertSpans$2($node, $) {
- if ($node.get(0)) {
- var _$node$get = $node.get(0),
- tagName = _$node$get.tagName;
-
- if (tagName === 'span') {
- // convert spans to divs
- convertNodeTo$1($node, $, 'div');
- }
- }
-}
-
-function addScoreTo$1($node, $, score) {
- if ($node) {
- convertSpans$2($node, $);
- addScore$$1($node, $, score);
- }
-}
-
-function scorePs$1($, weightNodes) {
- $('p, pre').not('[score]').each(function (index, node) {
- // The raw score for this paragraph, before we add any parent/child
- // scores.
- var $node = $(node);
- $node = setScore$1($node, $, getOrInitScore$$1($node, $, weightNodes));
-
- var $parent = $node.parent();
- var rawScore = scoreNode$$1($node);
-
- addScoreTo$1($parent, $, rawScore, weightNodes);
- if ($parent) {
- // Add half of the individual content score to the
- // grandparent
- addScoreTo$1($parent.parent(), $, rawScore / 2, weightNodes);
- }
- });
-
- return $;
-}
-
-var NORMALIZE_RE$1 = /\s{2,}/g;
-
-function normalizeSpaces$1(text) {
- return text.replace(NORMALIZE_RE$1, ' ').trim();
-}
-
-// Given a node type to search for, and a list of regular expressions,
-// look to see if this extraction can be found in the URL. Expects
-// that each expression in r_list will return group(1) as the proper
-// string to be cleaned.
-// Only used for date_published currently.
-
-// An expression that looks to try to find the page digit within a URL, if
-// it exists.
-// Matches:
-// page=1
-// pg=1
-// p=1
-// paging=12
-// pag=7
-// pagination/1
-// paging/88
-// pa/83
-// p/11
+// sib_sig = ' '.join([sibling.get('id', ''),
+// sibling.get('class', '')]).lower()
+// if 'caption' in sib_sig:
+// img_score += 15
//
-// Does not match:
-// pg=102
-// page:2
-var PAGE_IN_HREF_RE$1 = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');
-
-var HAS_ALPHA_RE$1 = /[a-z]/i;
-
-var IS_ALPHA_RE$1 = /^[a-z]+$/i;
-var IS_DIGIT_RE$1 = /^[0-9]+$/i;
-
-function isGoodSegment$1(segment, index, firstSegmentHasLetters) {
- var goodSegment = true;
-
- // If this is purely a number, and it's the first or second
- // url_segment, it's probably a page number. Remove it.
- if (index < 2 && IS_DIGIT_RE$1.test(segment) && segment.length < 3) {
- goodSegment = true;
- }
-
- // If this is the first url_segment and it's just "index",
- // remove it
- if (index === 0 && segment.toLowerCase() === 'index') {
- goodSegment = false;
- }
-
- // If our first or second url_segment is smaller than 3 characters,
- // and the first url_segment had no alphas, remove it.
- if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {
- goodSegment = false;
- }
-
- return goodSegment;
-}
-
-// Given a string, return True if it appears to have an ending sentence
-// within it, false otherwise.
-var SENTENCE_END_RE$1 = new RegExp('.( |$)');
-function hasSentenceEnd$1(text) {
- return SENTENCE_END_RE$1.test(text);
-}
-
-// Now that we have a top_candidate, look through the siblings of
-// it to see if any of them are decently scored. If they are, they
-// may be split parts of the content (Like two divs, a preamble and
-// a body.) Example:
-// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14
-function mergeSiblings$1($candidate, topScore, $) {
- if (!$candidate.parent().length) {
- return $candidate;
- }
-
- var siblingScoreThreshold = Math.max(10, topScore * 0.25);
- var wrappingDiv = $('');
-
- $candidate.parent().children().each(function (index, sibling) {
- var $sibling = $(sibling);
- // Ignore tags like BR, HR, etc
- if (NON_TOP_CANDIDATE_TAGS_RE$1$1.test(sibling.tagName)) {
- return null;
- }
-
- var siblingScore = getScore$1($sibling);
- if (siblingScore) {
- if ($sibling === $candidate) {
- wrappingDiv.append($sibling);
- } else {
- var contentBonus = 0;
- var density = linkDensity$1($sibling);
-
- // If sibling has a very low link density,
- // give it a small bonus
- if (density < 0.05) {
- contentBonus += 20;
- }
-
- // If sibling has a high link density,
- // give it a penalty
- if (density >= 0.5) {
- contentBonus -= 20;
- }
-
- // If sibling node has the same class as
- // candidate, give it a bonus
- if ($sibling.attr('class') === $candidate.attr('class')) {
- contentBonus += topScore * 0.2;
- }
-
- var newScore = siblingScore + contentBonus;
-
- if (newScore >= siblingScoreThreshold) {
- return wrappingDiv.append($sibling);
- } else if (sibling.tagName === 'p') {
- var siblingContent = $sibling.text();
- var siblingContentLength = textLength$1(siblingContent);
-
- if (siblingContentLength > 80 && density < 0.25) {
- return wrappingDiv.append($sibling);
- } else if (siblingContentLength <= 80 && density === 0 && hasSentenceEnd$1(siblingContent)) {
- return wrappingDiv.append($sibling);
- }
- }
- }
- }
-
- return null;
- });
-
- return wrappingDiv;
-}
-
-// Scoring
-
-function removeUnlessContent$1($node, $, weight) {
- // Explicitly save entry-content-asset tags, which are
- // noted as valuable in the Publisher guidelines. For now
- // this works everywhere. We may want to consider making
- // this less of a sure-thing later.
- if ($node.hasClass('entry-content-asset')) {
- return;
- }
-
- var content = normalizeSpaces$1($node.text());
-
- if (scoreCommas$1(content) < 10) {
- var pCount = $('p', $node).length;
- var inputCount = $('input', $node).length;
-
- // Looks like a form, too many inputs.
- if (inputCount > pCount / 3) {
- $node.remove();
- return;
- }
-
- var contentLength = content.length;
- var imgCount = $('img', $node).length;
-
- // Content is too short, and there are no images, so
- // this is probably junk content.
- if (contentLength < 25 && imgCount === 0) {
- $node.remove();
- return;
- }
-
- var density = linkDensity$1($node);
-
- // Too high of link density, is probably a menu or
- // something similar.
- // console.log(weight, density, contentLength)
- if (weight < 25 && density > 0.2 && contentLength > 75) {
- $node.remove();
- return;
- }
-
- // Too high of a link density, despite the score being
- // high.
- if (weight >= 25 && density > 0.5) {
- // Don't remove the node if it's a list and the
- // previous sibling starts with a colon though. That
- // means it's probably content.
- var tagName = $node.get(0).tagName;
- var nodeIsList = tagName === 'ol' || tagName === 'ul';
- if (nodeIsList) {
- var previousNode = $node.prev();
- if (previousNode && normalizeSpaces$1(previousNode.text()).slice(-1) === ':') {
- return;
- }
- }
-
- $node.remove();
- return;
- }
-
- var scriptCount = $('script', $node).length;
-
- // Too many script tags, not enough content.
- if (scriptCount > 0 && contentLength < 150) {
- $node.remove();
- return;
- }
- }
-}
-
-function absolutize$1($, rootUrl, attr, $content) {
- $('[' + attr + ']', $content).each(function (_, node) {
- var url = node.attribs[attr];
- var absoluteUrl = URL.resolve(rootUrl, url);
-
- node.attribs[attr] = absoluteUrl;
- });
-}
-
-function makeLinksAbsolute$1($content, $, url) {
- ['href', 'src'].forEach(function (attr) {
- return absolutize$1($, url, attr, $content);
- });
-
- return $content;
-}
-
-function textLength$1(text) {
- return text.trim().replace(/\s+/g, ' ').length;
-}
-
-// Determines what percentage of the text
-// in a node is link text
-// Takes a node, returns a float
-function linkDensity$1($node) {
- var totalTextLength = textLength$1($node.text());
-
- var linkText = $node.find('a').text();
- var linkLength = textLength$1(linkText);
-
- if (totalTextLength > 0) {
- return linkLength / totalTextLength;
- } else if (totalTextLength === 0 && linkLength > 0) {
- return 1;
- }
-
- return 0;
-}
-
-function isGoodNode$1($node, maxChildren) {
- // If it has a number of children, it's more likely a container
- // element. Skip it.
- if ($node.children().length > maxChildren) {
- return false;
- }
- // If it looks to be within a comment, skip it.
- if (withinComment$1($node)) {
- return false;
- }
-
- return true;
-}
-
-// strips all tags from a string of text
-function stripTags$1(text, $) {
- // Wrapping text in html element prevents errors when text
- // has no html
- var cleanText = $('' + text + '').text();
- return cleanText === '' ? text : cleanText;
-}
-
-function withinComment$1($node) {
- var parents = $node.parents().toArray();
- var commentParent = parents.find(function (parent) {
- var classAndId = parent.attribs.class + ' ' + parent.attribs.id;
- return classAndId.includes('comment');
- });
-
- return commentParent !== undefined;
-}
-
-// Given a node, determine if it's article-like enough to return
-// param: node (a cheerio node)
-// return: boolean
-
-// DOM manipulation
+// # Pull out width/height if they were set.
+// img_width = None
+// img_height = None
+// if 'width' in img.attrib:
+// try:
+// img_width = float(img.get('width'))
+// except ValueError:
+// pass
+// if 'height' in img.attrib:
+// try:
+// img_height = float(img.get('height'))
+// except ValueError:
+// pass
+//
+// # Penalty for skinny images
+// if img_width and img_width <= 50:
+// logger.debug('Skinny image found. Subtracting 50.')
+// img_score -= 50
+//
+// # Penalty for short images
+// if img_height and img_height <= 50:
+// # Wide, short images are more common than narrow, tall ones
+// logger.debug('Short image found. Subtracting 25.')
+// img_score -= 25
+//
+// if img_width and img_height and not 'sprite' in img_path:
+// area = img_width * img_height
+//
+// if area < 5000: # Smaller than 50x100
+// logger.debug('Image with small area found. Subtracting 100.')
+// img_score -= 100
+// else:
+// img_score += round(area/1000.0)
+//
+// # If the image is higher on the page than other images,
+// # it gets a bonus. Penalty if lower.
+// logger.debug('Adding page placement bonus of %d.', len(imgs)/2 - i)
+// img_score += len(imgs)/2 - i
+//
+// # Use the raw src here because we munged img_path for case
+// # insensitivity
+// logger.debug('Final score is %d.', img_score)
+// img_scores[img.attrib['src']] += img_score
+//
+// top_score = 0
+// top_url = None
+// for (url, score) in img_scores.items():
+// if score > top_score:
+// top_url = url
+// top_score = score
+//
+// if top_score > 0:
+// logger.debug('Using top score image from content. Score was %d', top_score)
+// return top_url
+//
+//
+// # If nothing else worked, check to see if there are any really
+// # probable nodes in the doc, like .
+// logger.debug('Trying to find lead image in probable nodes')
+// for selector in constants.LEAD_IMAGE_URL_SELECTORS:
+// nodes = self.resource.extract_by_selector(selector)
+// for node in nodes:
+// clean_value = None
+// if node.attrib.get('src'):
+// clean_value = self.clean(node.attrib['src'])
+//
+// if not clean_value and node.attrib.get('href'):
+// clean_value = self.clean(node.attrib['href'])
+//
+// if not clean_value and node.attrib.get('value'):
+// clean_value = self.clean(node.attrib['value'])
+//
+// if clean_value:
+// logger.debug('Found lead image in probable nodes.')
+// logger.debug('Node was: %s', node)
+// return clean_value
+//
+// return None
+function scoreSimilarity(score,articleUrl,href){// Do this last and only if we have a real candidate, because it's
+// potentially expensive computationally. Compare the link to this
+// URL using difflib to get the % similarity of these URLs. On a
+// sliding scale, subtract points from this link based on
+// similarity.
+if(score>0){var similarity=new difflib$1.SequenceMatcher(null,articleUrl,href).ratio();// Subtract .1 from diff_percent when calculating modifier,
+// which means that if it's less than 10% different, we give a
+// bonus instead. Ex:
+// 3% different = +17.5 points
+// 10% different = 0 points
+// 20% different = -25 points
+var diffPercent=1.0-similarity;var diffModifier=-(250*(diffPercent-0.2));return score+diffModifier;}return 0;}function scoreLinkText(linkText,pageNum){// If the link text can be parsed as a number, give it a minor
+// bonus, with a slight bias towards lower numbered pages. This is
+// so that pages that might not have 'next' in their text can still
+// get scored, and sorted properly by score.
+var score=0;if(IS_DIGIT_RE$1.test(linkText.trim())){var linkTextAsNum=parseInt(linkText,10);// If it's the first page, we already got it on the first call.
+// Give it a negative score. Otherwise, up to page 10, give a
+// small bonus.
+if(linkTextAsNum<2){score=-30;}else{score=Math.max(0,10-linkTextAsNum);}// If it appears that the current page number is greater than
+// this links page number, it's a very bad sign. Give it a big
+// penalty.
+if(pageNum&&pageNum>=linkTextAsNum){score-=50;}}return score;}function scorePageInLink(pageNum,isWp){// page in the link = bonus. Intentionally ignore wordpress because
+// their ?p=123 link style gets caught by this even though it means
+// separate documents entirely.
+if(pageNum&&!isWp){return 50;}return 0;}var DIGIT_RE$2=/\d/;// A list of words that, if found in link text or URLs, likely mean that
+// this link is not a next page link.
+var EXTRANEOUS_LINK_HINTS$1=['print','archive','comment','discuss','e-mail','email','share','reply','all','login','sign','single','adx','entry-unrelated'];var EXTRANEOUS_LINK_HINTS_RE$1=new RegExp(EXTRANEOUS_LINK_HINTS$1.join('|'),'i');// Match any link text/classname/id that looks like it could mean the next
+// page. Things like: next, continue, >, >>, » but not >|, »| as those can
+// mean last page.
+var NEXT_LINK_TEXT_RE$1=new RegExp('(next|weiter|continue|>([^|]|$)|»([^|]|$))','i');// Match any link text/classname/id that looks like it is an end link: things
+// like "first", "last", "end", etc.
+var CAP_LINK_TEXT_RE$1=new RegExp('(first|last|end)','i');// Match any link text/classname/id that looks like it means the previous
+// page.
+var PREV_LINK_TEXT_RE$1=new RegExp('(prev|earl|old|new|<|«)','i');// Match any phrase that looks like it could be page, or paging, or pagination
+function scoreExtraneousLinks(href){// If the URL itself contains extraneous values, give a penalty.
+if(EXTRANEOUS_LINK_HINTS_RE$1.test(href)){return-25;}return 0;}function makeSig$1($link){return($link.attr('class')||'')+' '+($link.attr('id')||'');}function scoreByParents$1($link){// If a parent node contains paging-like classname or id, give a
+// bonus. Additionally, if a parent_node contains bad content
+// (like 'sponsor'), give a penalty.
+var $parent=$link.parent();var positiveMatch=false;var negativeMatch=false;var score=0;_Array$from(range(0,4)).forEach(function(){if($parent.length===0){return;}var parentData=makeSig$1($parent,' ');// If we have 'page' or 'paging' in our data, that's a good
+// sign. Add a bonus.
+if(!positiveMatch&&PAGE_RE$1.test(parentData)){positiveMatch=true;score+=25;}// If we have 'comment' or something in our data, and
+// we don't have something like 'content' as well, that's
+// a bad sign. Give a penalty.
+if(!negativeMatch&&NEGATIVE_SCORE_RE$2.test(parentData)&&EXTRANEOUS_LINK_HINTS_RE$1.test(parentData)){if(!POSITIVE_SCORE_RE$2.test(parentData)){negativeMatch=true;score-=25;}}$parent=$parent.parent();});return score;}function scorePrevLink(linkData){// If the link has something like "previous", its definitely
+// an old link, skip it.
+if(PREV_LINK_TEXT_RE$1.test(linkData)){return-200;}return 0;}function shouldScore(href,articleUrl,baseUrl,parsedUrl,linkText,previousUrls){// skip if we've already fetched this url
+if(previousUrls.find(function(url){return href===url;})!==undefined){return false;}// If we've already parsed this URL, or the URL matches the base
+// URL, or is empty, skip it.
+if(!href||href===articleUrl||href===baseUrl){return false;}var hostname=parsedUrl.hostname;var _URL$parse=URL$1.parse(href),linkHost=_URL$parse.hostname;// Domain mismatch.
+if(linkHost!==hostname){return false;}// If href doesn't contain a digit after removing the base URL,
+// it's certainly not the next page.
+var fragment=href.replace(baseUrl,'');if(!DIGIT_RE$2.test(fragment)){return false;}// This link has extraneous content (like "comment") in its link
+// text, so we skip it.
+if(EXTRANEOUS_LINK_HINTS_RE$1.test(linkText)){return false;}// Next page link text is never long, skip if it is too long.
+if(linkText.length>25){return false;}return true;}function scoreBaseUrl(href,baseRegex){// If the baseUrl isn't part of this URL, penalize this
+// link. It could still be the link, but the odds are lower.
+// Example:
+// http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html
+if(!baseRegex.test(href)){return-25;}return 0;}function scoreNextLinkText(linkData){// Things like "next", ">>", etc.
+if(NEXT_LINK_TEXT_RE$1.test(linkData)){return 50;}return 0;}function scoreCapLinks(linkData){// Cap links are links like "last", etc.
+if(CAP_LINK_TEXT_RE$1.test(linkData)){// If we found a link like "last", but we've already seen that
+// this link is also "next", it's fine. If it's not been
+// previously marked as "next", then it's probably bad.
+// Penalize.
+if(NEXT_LINK_TEXT_RE$1.test(linkData)){return-65;}}return 0;}function makeBaseRegex(baseUrl){return new RegExp('^'+baseUrl,'i');}function makeSig($link,linkText){return(linkText||$link.text())+' '+($link.attr('class')||'')+' '+($link.attr('id')||'');}function scoreLinks(_ref){var links=_ref.links,articleUrl=_ref.articleUrl,baseUrl=_ref.baseUrl,parsedUrl=_ref.parsedUrl,$=_ref.$,_ref$previousUrls=_ref.previousUrls,previousUrls=_ref$previousUrls===undefined?[]:_ref$previousUrls;parsedUrl=parsedUrl||URL$1.parse(articleUrl);var baseRegex=makeBaseRegex(baseUrl);var isWp=isWordpress$1($);// Loop through all links, looking for hints that they may be next-page
+// links. Things like having "page" in their textContent, className or
+// id, or being a child of a node with a page-y className or id.
+//
+// After we do that, assign each page a score, and pick the one that
+// looks most like the next page link, as long as its score is strong
+// enough to have decent confidence.
+var scoredPages=links.reduce(function(possiblePages,link){// Remove any anchor data since we don't do a good job
+// standardizing URLs (it's hard), we're going to do
+// some checking with and without a trailing slash
+var attrs=getAttrs$1(link);var href=removeAnchor$1(attrs.href);var $link=$(link);var linkText=$link.text();if(!shouldScore(href,articleUrl,baseUrl,parsedUrl,linkText,previousUrls)){return possiblePages;}// ## PASSED THE FIRST-PASS TESTS. Start scoring. ##
+if(!possiblePages[href]){possiblePages[href]={score:0,linkText:linkText,href:href};}else{possiblePages[href].linkText=possiblePages[href].linkText+'|'+linkText;}var possiblePage=possiblePages[href];var linkData=makeSig($link,linkText);var pageNum=pageNumFromUrl$1(href);var score=scoreBaseUrl(href,baseRegex);score+=scoreNextLinkText(linkData);score+=scoreCapLinks(linkData);score+=scorePrevLink(linkData);score+=scoreByParents$1($link);score+=scoreExtraneousLinks(href);score+=scorePageInLink(pageNum,isWp);score+=scoreLinkText(linkText,pageNum);score+=scoreSimilarity(score,articleUrl,href);possiblePage.score=score;return possiblePages;},{});return _Reflect$ownKeys$1(scoredPages).length===0?null:scoredPages;}/* eslint-disable */// Looks for and returns next page url
+// for multi-page articles
+var GenericNextPageUrlExtractor={extract:function extract(_ref){var $=_ref.$,url=_ref.url,parsedUrl=_ref.parsedUrl,_ref$previousUrls=_ref.previousUrls,previousUrls=_ref$previousUrls===undefined?[]:_ref$previousUrls;parsedUrl=parsedUrl||URL$1.parse(url);var articleUrl=removeAnchor$1(url);var baseUrl=articleBaseUrl$1(url,parsedUrl);var links=$('a[href]').toArray();var scoredLinks=scoreLinks({links:links,articleUrl:articleUrl,baseUrl:baseUrl,parsedUrl:parsedUrl,$:$,previousUrls:previousUrls});// If no links were scored, return null
+if(!scoredLinks)return null;// now that we've scored all possible pages,
+// find the biggest one.
+var topPage=_Reflect$ownKeys$1(scoredLinks).reduce(function(acc,link){var scoredLink=scoredLinks[link];return scoredLink.score>acc.score?scoredLink:acc;},{score:-100});// If the score is less than 50, we're not confident enough to use it,
+// so we fail.
+if(topPage.score>=50){return topPage.href;}return null;}};var CANONICAL_META_SELECTORS=['og:url'];function parseDomain(url){var parsedUrl=URL$1.parse(url);var hostname=parsedUrl.hostname;return hostname;}function result(url){return{url:url,domain:parseDomain(url)};}var GenericUrlExtractor={extract:function extract(_ref){var $=_ref.$,url=_ref.url,metaCache=_ref.metaCache;var $canonical=$('link[rel=canonical]');if($canonical.length!==0){var href=$canonical.attr('href');if(href){return result(href);}}var metaUrl=extractFromMeta$$1($,CANONICAL_META_SELECTORS,metaCache);if(metaUrl){return result(metaUrl);}return result(url);}};var EXCERPT_META_SELECTORS=['og:description','twitter:description'];function clean$2(content,$){var maxLength=arguments.length>2&&arguments[2]!==undefined?arguments[2]:200;content=content.replace(/[\s\n]+/g,' ').trim();return ellipsize$1(content,maxLength,{ellipse:'…'});}var GenericExcerptExtractor={extract:function extract(_ref){var $=_ref.$,content=_ref.content,metaCache=_ref.metaCache;var excerpt=extractFromMeta$$1($,EXCERPT_META_SELECTORS,metaCache);if(excerpt){return clean$2(stripTags$1(excerpt,$));}// Fall back to excerpting from the extracted content
+var maxLength=200;var shortContent=content.slice(0,maxLength*5);return clean$2($(shortContent).text(),$,maxLength);}};var GenericWordCountExtractor={extract:function extract(_ref){var content=_ref.content;var $=cheerio$1.load(content);var $content=$('div').first();var text=normalizeSpaces$1($content.text());return text.split(/\s/).length;}};var GenericExtractor={// This extractor is the default for all domains
+domain:'*',title:GenericTitleExtractor.extract,date_published:GenericDatePublishedExtractor.extract,author:GenericAuthorExtractor.extract,content:GenericContentExtractor.extract.bind(GenericContentExtractor),lead_image_url:GenericLeadImageUrlExtractor.extract,dek:GenericDekExtractor.extract,next_page_url:GenericNextPageUrlExtractor.extract,url_and_domain:GenericUrlExtractor.extract,excerpt:GenericExcerptExtractor.extract,word_count:GenericWordCountExtractor.extract,direction:function direction(_ref){var title=_ref.title;return stringDirection$1.getDirection(title);},extract:function extract(options){var html=options.html,cheerio$$1=options.cheerio,$=options.$;if(html&&!$){var loaded=cheerio$$1.load(html);options.$=loaded;}var title=this.title(options);var date_published=this.date_published(options);var author=this.author(options);var content=this.content(_extends$1({},options,{title:title}));var lead_image_url=this.lead_image_url(_extends$1({},options,{content:content}));var dek=this.dek(_extends$1({},options,{content:content}));var next_page_url=this.next_page_url(options);var excerpt=this.excerpt(_extends$1({},options,{content:content}));var word_count=this.word_count(_extends$1({},options,{content:content}));var direction=this.direction({title:title});var _url_and_domain=this.url_and_domain(options),url=_url_and_domain.url,domain=_url_and_domain.domain;return{title:title,author:author,date_published:date_published||null,dek:dek,lead_image_url:lead_image_url,content:content,next_page_url:next_page_url,url:url,domain:domain,excerpt:excerpt,word_count:word_count,direction:direction};}};function getExtractor(url,parsedUrl){parsedUrl=parsedUrl||URL$1.parse(url);var _parsedUrl=parsedUrl,hostname=_parsedUrl.hostname;var baseDomain=hostname.split('.').slice(-2).join('.');return Extractors[hostname]||Extractors[baseDomain]||GenericExtractor;}/* eslint-disable */// Remove elements by an array of selectors
+function cleanBySelectors($content,$,_ref){var clean=_ref.clean;if(!clean)return $content;$(clean.join(','),$content).remove();return $content;}// Transform matching elements
+function transformElements($content,$,_ref2){var transforms=_ref2.transforms;if(!transforms)return $content;_Reflect$ownKeys$1(transforms).forEach(function(key){var $matches=$(key,$content);var value=transforms[key];// If value is a string, convert directly
+if(typeof value==='string'){$matches.each(function(index,node){convertNodeTo$$1($(node),$,transforms[key]);});}else if(typeof value==='function'){// If value is function, apply function to node
+$matches.each(function(index,node){var result=value($(node),$);// If function returns a string, convert node to that value
+if(typeof result==='string'){convertNodeTo$$1($(node),$,result);}});}});return $content;}function findMatchingSelector($,selectors){return selectors.find(function(selector){if(Array.isArray(selector)){var _selector=_slicedToArray$1(selector,2),s=_selector[0],attr=_selector[1];return $(s).length===1&&$(s).attr(attr)&&$(s).attr(attr).trim()!=='';}// debugger
+return $(selector).length===1&&$(selector).text().trim()!=='';});}function select(opts){var $=opts.$,type=opts.type,extractionOpts=opts.extractionOpts,_opts$extractHtml=opts.extractHtml,extractHtml=_opts$extractHtml===undefined?false:_opts$extractHtml;// Skip if there's not extraction for this type
+if(!extractionOpts)return null;// If a string is hardcoded for a type (e.g., Wikipedia
+// contributors), return the string
+if(typeof extractionOpts==='string')return extractionOpts;var selectors=extractionOpts.selectors,_extractionOpts$defau=extractionOpts.defaultCleaner,defaultCleaner=_extractionOpts$defau===undefined?true:_extractionOpts$defau;var matchingSelector=findMatchingSelector($,selectors);if(!matchingSelector)return null;// Declaring result; will contain either
+// text or html, which will be cleaned
+// by the appropriate cleaner type
+// If the selector type requests html as its return type
+// transform and clean the element with provided selectors
+if(extractHtml){var $content=$(matchingSelector);// Wrap in div so transformation can take place on root element
+$content.wrap($(''));$content=$content.parent();$content=transformElements($content,$,extractionOpts);$content=cleanBySelectors($content,$,extractionOpts);$content=Cleaners[type]($content,_extends$1({},opts,{defaultCleaner:defaultCleaner}));return $.html($content);}var result=void 0;// if selector is an array (e.g., ['img', 'src']),
+// extract the attr
+if(Array.isArray(matchingSelector)){var _matchingSelector=_slicedToArray$1(matchingSelector,2),selector=_matchingSelector[0],attr=_matchingSelector[1];result=$(selector).attr(attr).trim();}else{result=$(matchingSelector).text().trim();}// Allow custom extractor to skip default cleaner
+// for this type; defaults to true
+if(defaultCleaner){return Cleaners[type](result,opts);}return result;}function extractResult(opts){var type=opts.type,extractor=opts.extractor,_opts$fallback=opts.fallback,fallback=_opts$fallback===undefined?true:_opts$fallback;var result=select(_extends$1({},opts,{extractionOpts:extractor[type]}));// If custom parser succeeds, return the result
+if(result){return result;}// If nothing matches the selector, and fallback is enabled,
+// run the Generic extraction
+if(fallback)return GenericExtractor[type](opts);return null;}var RootExtractor={extract:function extract(){var extractor=arguments.length>0&&arguments[0]!==undefined?arguments[0]:GenericExtractor;var opts=arguments[1];var _opts=opts,contentOnly=_opts.contentOnly,extractedTitle=_opts.extractedTitle;// This is the generic extractor. Run its extract method
+if(extractor.domain==='*')return extractor.extract(opts);opts=_extends$1({},opts,{extractor:extractor});if(contentOnly){var _content=extractResult(_extends$1({},opts,{type:'content',extractHtml:true,title:extractedTitle}));return{content:_content};}var title=extractResult(_extends$1({},opts,{type:'title'}));var date_published=extractResult(_extends$1({},opts,{type:'date_published'}));var author=extractResult(_extends$1({},opts,{type:'author'}));var next_page_url=extractResult(_extends$1({},opts,{type:'next_page_url'}));var content=extractResult(_extends$1({},opts,{type:'content',extractHtml:true,title:title}));var lead_image_url=extractResult(_extends$1({},opts,{type:'lead_image_url',content:content}));var excerpt=extractResult(_extends$1({},opts,{type:'excerpt',content:content}));var dek=extractResult(_extends$1({},opts,{type:'dek',content:content,excerpt:excerpt}));var word_count=extractResult(_extends$1({},opts,{type:'word_count',content:content}));var direction=extractResult(_extends$1({},opts,{type:'direction',title:title}));var _ref3=extractResult(_extends$1({},opts,{type:'url_and_domain'}))||{url:null,domain:null},url=_ref3.url,domain=_ref3.domain;return{title:title,content:content,author:author,date_published:date_published,lead_image_url:lead_image_url,dek:dek,next_page_url:next_page_url,url:url,domain:domain,excerpt:excerpt,word_count:word_count,direction:direction};}};var collectAllPages=function(){var _ref=_asyncToGenerator(_regeneratorRuntime.mark(function _callee(_ref2){var next_page_url=_ref2.next_page_url,html=_ref2.html,$=_ref2.$,metaCache=_ref2.metaCache,result=_ref2.result,Extractor=_ref2.Extractor,title=_ref2.title,url=_ref2.url,cheerio$$1=_ref2.cheerio;var pages,previousUrls,extractorOpts,nextPageResult,word_count;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:// At this point, we've fetched just the first page
+pages=1;previousUrls=[removeAnchor$1(url)];// If we've gone over 26 pages, something has
+// likely gone wrong.
+case 2:if(!(next_page_url&&pages<26)){_context.next=15;break;}pages+=1;_context.next=6;return Resource.create(next_page_url);case 6:$=_context.sent;html=$.html();extractorOpts={url:next_page_url,html:html,$:$,metaCache:metaCache,contentOnly:true,extractedTitle:title,previousUrls:previousUrls,cheerio:cheerio$$1};nextPageResult=RootExtractor.extract(Extractor,extractorOpts);previousUrls.push(next_page_url);result=_extends$1({},result,{content:'\n '+result.content+'\n \n
'});return _context.abrupt('return',_extends$1({},result,{total_pages:pages,pages_rendered:pages,word_count:word_count}));case 17:case'end':return _context.stop();}}},_callee,this);}));function collectAllPages(_x){return _ref.apply(this,arguments);}return collectAllPages;}();var Mercury={parse:function parse(url,html){var _this=this;var opts=arguments.length>2&&arguments[2]!==undefined?arguments[2]:{};return _asyncToGenerator(_regeneratorRuntime.mark(function _callee(){var _opts$fetchAllPages,fetchAllPages,_opts$fallback,fallback,parsedUrl,Extractor,$,metaCache,result,_result,title,next_page_url;return _regeneratorRuntime.wrap(function _callee$(_context){while(1){switch(_context.prev=_context.next){case 0:_opts$fetchAllPages=opts.fetchAllPages,fetchAllPages=_opts$fetchAllPages===undefined?true:_opts$fetchAllPages,_opts$fallback=opts.fallback,fallback=_opts$fallback===undefined?true:_opts$fallback;parsedUrl=URL$1.parse(url);if(validateUrl(parsedUrl)){_context.next=4;break;}return _context.abrupt('return',Errors.badUrl);case 4:Extractor=getExtractor(url,parsedUrl);// console.log(`Using extractor for ${Extractor.domain}`);
+_context.next=7;return Resource.create(url,html,parsedUrl);case 7:$=_context.sent;if(!$.error){_context.next=10;break;}return _context.abrupt('return',$);case 10:html=$.html();// Cached value of every meta name in our document.
+// Used when extracting title/author/date_published/dek
+metaCache=$('meta').map(function(_,node){return $(node).attr('name');}).toArray();result=RootExtractor.extract(Extractor,{url:url,html:html,$:$,metaCache:metaCache,parsedUrl:parsedUrl,fallback:fallback,cheerio:cheerio$1});_result=result,title=_result.title,next_page_url=_result.next_page_url;// Fetch more pages if next_page_url found
+if(!(fetchAllPages&&next_page_url)){_context.next=20;break;}_context.next=17;return collectAllPages({Extractor:Extractor,next_page_url:next_page_url,html:html,$:$,metaCache:metaCache,result:result,title:title,url:url,cheerio:cheerio$1});case 17:result=_context.sent;_context.next=21;break;case 20:result=_extends$1({},result,{total_pages:1,rendered_pages:1});case 21:return _context.abrupt('return',result);case 22:case'end':return _context.stop();}}},_callee,_this);}))();},// A convenience method for getting a resource
+// to work with, e.g., for custom extractor generator
+fetchResource:function fetchResource(url){var _this2=this;return _asyncToGenerator(_regeneratorRuntime.mark(function _callee2(){return _regeneratorRuntime.wrap(function _callee2$(_context2){while(1){switch(_context2.prev=_context2.next){case 0:_context2.next=2;return Resource.create(url);case 2:return _context2.abrupt('return',_context2.sent);case 3:case'end':return _context2.stop();}}},_callee2,_this2);}))();}};var mercury=Mercury;
function insertValues(strings) {
for (var _len = arguments.length, values = Array(_len > 1 ? _len - 1 : 0), _key = 1; _key < _len; _key++) {
@@ -2135,7 +2216,7 @@ function testFor(key, value, dir, file, url) {
return k === key;
})) return '';
- return template(_templateObject$1, key, key, dir, file, url, key, key, value ? "`" + value + "`" : "''");
+ return template(_templateObject$1, key, key, dir, file, url, key, key, value ? '`' + value + '`' : "''");
}
var extractorTestTemplate = function (file, url, dir, result, name) {
@@ -2144,6 +2225,9 @@ var extractorTestTemplate = function (file, url, dir, result, name) {
}).join('\n\n'), dir, file, url);
};
+/* eslint-disable import/no-extraneous-dependencies */
+/* eslint-disable no-use-before-define */
+/* eslint-disable no-console */
var questions = [{
type: 'input',
name: 'website',
@@ -2157,16 +2241,12 @@ var questions = [{
return false;
}
}];
-
-inquirer.prompt(questions).then(function (answers) {
- scaffoldCustomParser(answers.website);
-});
-
var spinner = void 0;
+
function confirm(fn, args, msg, newParser) {
spinner = ora({ text: msg });
spinner.start();
- var result = fn.apply(null, args);
+ var result = fn.apply(undefined, _toConsumableArray(args));
if (result && result.then) {
result.then(function (r) {
@@ -2179,19 +2259,66 @@ function confirm(fn, args, msg, newParser) {
return result;
}
+function confirmCreateDir(dir, msg) {
+ if (!fs.existsSync(dir)) {
+ confirm(fs.mkdirSync, [dir], msg);
+ }
+}
+
+function getDir(url) {
+ var _URL$parse2 = URL.parse(url),
+ hostname = _URL$parse2.hostname;
+
+ return './src/extractors/custom/' + hostname;
+}
+
+function scaffoldCustomParser(url) {
+ var dir = getDir(url);
+
+ var _URL$parse3 = URL.parse(url),
+ hostname = _URL$parse3.hostname;
+
+ var newParser = false;
+
+ if (!fs.existsSync(dir)) {
+ newParser = true;
+ confirmCreateDir(dir, 'Creating ' + hostname + ' directory');
+ confirmCreateDir('./fixtures/' + hostname, 'Creating fixtures directory');
+ }
+
+ confirm(mercury.fetchResource, [url], 'Fetching fixture', newParser);
+}
+
+inquirer.prompt(questions).then(function (answers) {
+ scaffoldCustomParser(answers.website);
+});
+
+function generateScaffold(url, file, result) {
+ var _URL$parse4 = URL.parse(url),
+ hostname = _URL$parse4.hostname;
+
+ var extractor = extractorTemplate(hostname, extractorName(hostname));
+ var extractorTest = extractorTestTemplate(file, url, getDir(url), result, extractorName(hostname));
+
+ fs.writeFileSync(getDir(url) + '/index.js', extractor);
+ fs.writeFileSync(getDir(url) + '/index.test.js', extractorTest);
+ fs.appendFileSync('./src/extractors/custom/index.js', exportString(url));
+ child_process.exec('npm run lint-fix-quiet -- ' + getDir(url) + '/*.js');
+}
+
function savePage($, _ref, newParser) {
var _ref2 = _slicedToArray(_ref, 1),
url = _ref2[0];
- var _URL$parse2 = URL.parse(url),
- hostname = _URL$parse2.hostname;
+ var _URL$parse5 = URL.parse(url),
+ hostname = _URL$parse5.hostname;
spinner.succeed();
var filename = new Date().getTime();
var file = './fixtures/' + hostname + '/' + filename + '.html';
// fix http(s) relative links:
- makeLinksAbsolute$1($('*').first(), $, url);
+ makeLinksAbsolute$$1($('*').first(), $, url);
$('[src], [href]').each(function (index, node) {
var $node = $(node);
var link = $node.attr('src');
@@ -2199,11 +2326,11 @@ function savePage($, _ref, newParser) {
$node.attr('src', 'http:' + link);
}
});
- var html = stripJunkTags$1($('*').first(), $, ['script']).html();
+ var html = stripJunkTags($('*').first(), $, ['script']).html();
fs.writeFileSync(file, html);
- var result = mercury.parse(url, html).then(function (result) {
+ mercury.parse(url, html).then(function (result) {
if (newParser) {
confirm(generateScaffold, [url, file, result], 'Generating parser and tests');
console.log('Your custom site extractor has been set up. To get started building it, run\n yarn watch:test -- ' + hostname + '\n -- OR --\n npm run watch:test -- ' + hostname);
@@ -2213,17 +2340,11 @@ function savePage($, _ref, newParser) {
});
}
-function generateScaffold(url, file, result) {
- var _URL$parse3 = URL.parse(url),
- hostname = _URL$parse3.hostname;
-
- var extractor = extractorTemplate(hostname, extractorName(hostname));
- var extractorTest = extractorTestTemplate(file, url, getDir(url), result, extractorName(hostname));
+function exportString(url) {
+ var _URL$parse6 = URL.parse(url),
+ hostname = _URL$parse6.hostname;
- fs.writeFileSync(getDir(url) + '/index.js', extractor);
- fs.writeFileSync(getDir(url) + '/index.test.js', extractorTest);
- fs.appendFileSync('./src/extractors/custom/index.js', exportString(url));
- child_process.exec('npm run lint-fix-quiet -- ' + getDir(url) + '/*.js');
+ return 'export * from \'./' + hostname + '\';';
}
function extractorName(hostname) {
@@ -2232,41 +2353,4 @@ function extractorName(hostname) {
}).join('');
return name + 'Extractor';
}
-
-function exportString(url) {
- var _URL$parse4 = URL.parse(url),
- hostname = _URL$parse4.hostname;
-
- return 'export * from \'./' + hostname + '\';';
-}
-
-function confirmCreateDir(dir, msg) {
- if (!fs.existsSync(dir)) {
- confirm(fs.mkdirSync, [dir], msg);
- }
-}
-
-function scaffoldCustomParser(url) {
- var dir = getDir(url);
-
- var _URL$parse5 = URL.parse(url),
- hostname = _URL$parse5.hostname;
-
- var newParser = false;
-
- if (!fs.existsSync(dir)) {
- newParser = true;
- confirmCreateDir(dir, 'Creating ' + hostname + ' directory');
- confirmCreateDir('./fixtures/' + hostname, 'Creating fixtures directory');
- }
-
- confirm(mercury.fetchResource, [url], 'Fetching fixture', newParser);
-}
-
-function getDir(url) {
- var _URL$parse6 = URL.parse(url),
- hostname = _URL$parse6.hostname;
-
- return './src/extractors/custom/' + hostname;
-}
//# sourceMappingURL=generate-custom-parser.js.map
diff --git a/dist/generate-custom-parser.js.map b/dist/generate-custom-parser.js.map
index 31686e55..cbdaecf0 100644
--- a/dist/generate-custom-parser.js.map
+++ b/dist/generate-custom-parser.js.map
@@ -1 +1 @@
-{"version":3,"file":null,"sources":["mercury.js","../src/utils/dom/constants.js","../src/utils/dom/brs-to-ps.js","../src/utils/dom/paragraphize.js","../src/utils/dom/convert-to-paragraphs.js","../src/utils/dom/convert-node-to.js","../src/utils/dom/clean-images.js","../src/utils/dom/strip-junk-tags.js","../src/utils/dom/clean-attributes.js","../src/extractors/generic/content/scoring/constants.js","../src/extractors/generic/content/scoring/get-weight.js","../src/extractors/generic/content/scoring/get-score.js","../src/extractors/generic/content/scoring/score-commas.js","../src/extractors/generic/content/scoring/score-length.js","../src/extractors/generic/content/scoring/score-paragraph.js","../src/extractors/generic/content/scoring/set-score.js","../src/extractors/generic/content/scoring/add-score.js","../src/extractors/generic/content/scoring/add-to-parent.js","../src/extractors/generic/content/scoring/get-or-init-score.js","../src/extractors/generic/content/scoring/score-node.js","../src/extractors/generic/content/scoring/score-content.js","../src/utils/text/normalize-spaces.js","../src/utils/text/extract-from-url.js","../src/utils/text/constants.js","../src/utils/text/article-base-url.js","../src/utils/text/has-sentence-end.js","../src/extractors/generic/content/scoring/merge-siblings.js","../src/extractors/generic/content/scoring/index.js","../src/utils/dom/clean-tags.js","../src/utils/dom/make-links-absolute.js","../src/utils/dom/link-density.js","../src/utils/dom/extract-from-selectors.js","../src/utils/dom/strip-tags.js","../src/utils/dom/within-comment.js","../src/utils/dom/node-is-sufficient.js","../src/utils/dom/index.js","../scripts/templates/insert-values.js","../scripts/templates/index.js","../scripts/templates/custom-extractor.js","../scripts/templates/custom-extractor-test.js","../scripts/generate-custom-parser.js"],"sourcesContent":["'use strict';\n\nfunction _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; }\n\nvar _regeneratorRuntime = _interopDefault(require('babel-runtime/regenerator'));\nvar _extends = _interopDefault(require('babel-runtime/helpers/extends'));\nvar _asyncToGenerator = _interopDefault(require('babel-runtime/helpers/asyncToGenerator'));\nvar URL = _interopDefault(require('url'));\nvar cheerio = _interopDefault(require('cheerio'));\nvar _Promise = _interopDefault(require('babel-runtime/core-js/promise'));\nvar request = _interopDefault(require('request'));\nvar _Reflect$ownKeys = _interopDefault(require('babel-runtime/core-js/reflect/own-keys'));\nvar _Object$keys = _interopDefault(require('babel-runtime/core-js/object/keys'));\nvar _toConsumableArray = _interopDefault(require('babel-runtime/helpers/toConsumableArray'));\nvar _slicedToArray = _interopDefault(require('babel-runtime/helpers/slicedToArray'));\nvar stringDirection = _interopDefault(require('string-direction'));\nvar _getIterator = _interopDefault(require('babel-runtime/core-js/get-iterator'));\nvar _defineProperty = _interopDefault(require('babel-runtime/helpers/defineProperty'));\nvar _typeof = _interopDefault(require('babel-runtime/helpers/typeof'));\nvar validUrl = _interopDefault(require('valid-url'));\nvar moment = _interopDefault(require('moment'));\nvar wuzzy = _interopDefault(require('wuzzy'));\nvar difflib = _interopDefault(require('difflib'));\nvar _Array$from = _interopDefault(require('babel-runtime/core-js/array/from'));\nvar ellipsize = _interopDefault(require('ellipsize'));\n\nvar _marked = [range].map(_regeneratorRuntime.mark);\n\nfunction range() {\n var start = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;\n var end = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 1;\n return _regeneratorRuntime.wrap(function range$(_context) {\n while (1) {\n switch (_context.prev = _context.next) {\n case 0:\n if (!(start <= end)) {\n _context.next = 5;\n break;\n }\n\n _context.next = 3;\n return start += 1;\n\n case 3:\n _context.next = 0;\n break;\n\n case 5:\n case \"end\":\n return _context.stop();\n }\n }\n }, _marked[0], this);\n}\n\n// extremely simple url validation as a first step\nfunction validateUrl(_ref) {\n var hostname = _ref.hostname;\n\n // If this isn't a valid url, return an error message\n return !!hostname;\n}\n\nvar Errors = {\n badUrl: {\n error: true,\n messages: 'The url parameter passed does not look like a valid URL. Please check your data and try again.'\n }\n};\n\nvar REQUEST_HEADERS = {\n 'User-Agent': 'Readability - http://readability.com/about/'\n};\n\n// The number of milliseconds to attempt to fetch a resource before timing out.\nvar FETCH_TIMEOUT = 10000;\n\n// Content types that we do not extract content from\nvar BAD_CONTENT_TYPES = ['audio/mpeg', 'image/gif', 'image/jpeg', 'image/jpg'];\n\nvar BAD_CONTENT_TYPES_RE = new RegExp('^(' + BAD_CONTENT_TYPES.join('|') + ')$', 'i');\n\n// Use this setting as the maximum size an article can be\n// for us to attempt parsing. Defaults to 5 MB.\nvar MAX_CONTENT_LENGTH = 5242880;\n\n// Turn the global proxy on or off\n// Proxying is not currently enabled in Python source\n// so not implementing logic in port.\n\nfunction get(options) {\n return new _Promise(function (resolve, reject) {\n request(options, function (err, response, body) {\n if (err) {\n reject(err);\n } else {\n resolve({ body: body, response: response });\n }\n });\n });\n}\n\n// Evaluate a response to ensure it's something we should be keeping.\n// This does not validate in the sense of a response being 200 level or\n// not. Validation here means that we haven't found reason to bail from\n// further processing of this url.\n\nfunction validateResponse(response) {\n var parseNon2xx = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;\n\n // Check if we got a valid status code\n if (response.statusMessage !== 'OK') {\n if (!response.statusCode) {\n throw new Error('Unable to fetch content. Original exception was ' + response.error);\n } else if (!parseNon2xx) {\n throw new Error('Resource returned a response status code of ' + response.statusCode + ' and resource was instructed to reject non-2xx level status codes.');\n }\n }\n\n var _response$headers = response.headers,\n contentType = _response$headers['content-type'],\n contentLength = _response$headers['content-length'];\n\n // Check that the content is not in BAD_CONTENT_TYPES\n\n if (BAD_CONTENT_TYPES_RE.test(contentType)) {\n throw new Error('Content-type for this resource was ' + contentType + ' and is not allowed.');\n }\n\n // Check that the content length is below maximum\n if (contentLength > MAX_CONTENT_LENGTH) {\n throw new Error('Content for this resource was too large. Maximum content length is ' + MAX_CONTENT_LENGTH + '.');\n }\n\n return true;\n}\n\n// Grabs the last two pieces of the URL and joins them back together\n// This is to get the 'livejournal.com' from 'erotictrains.livejournal.com'\n\n\n// Set our response attribute to the result of fetching our URL.\n// TODO: This should gracefully handle timeouts and raise the\n// proper exceptions on the many failure cases of HTTP.\n// TODO: Ensure we are not fetching something enormous. Always return\n// unicode content for HTML, with charset conversion.\n\nvar fetchResource$1 = (function () {\n var _ref2 = _asyncToGenerator(_regeneratorRuntime.mark(function _callee(url, parsedUrl) {\n var options, _ref3, response, body;\n\n return _regeneratorRuntime.wrap(function _callee$(_context) {\n while (1) {\n switch (_context.prev = _context.next) {\n case 0:\n parsedUrl = parsedUrl || URL.parse(encodeURI(url));\n\n options = {\n url: parsedUrl,\n headers: _extends({}, REQUEST_HEADERS),\n timeout: FETCH_TIMEOUT,\n // Don't set encoding; fixes issues\n // w/gzipped responses\n encoding: null,\n // Accept cookies\n jar: true,\n // Accept and decode gzip\n gzip: true,\n // Follow any redirect\n followAllRedirects: true\n };\n _context.next = 4;\n return get(options);\n\n case 4:\n _ref3 = _context.sent;\n response = _ref3.response;\n body = _ref3.body;\n _context.prev = 7;\n\n validateResponse(response);\n return _context.abrupt('return', { body: body, response: response });\n\n case 12:\n _context.prev = 12;\n _context.t0 = _context['catch'](7);\n return _context.abrupt('return', Errors.badUrl);\n\n case 15:\n case 'end':\n return _context.stop();\n }\n }\n }, _callee, this, [[7, 12]]);\n }));\n\n function fetchResource(_x2, _x3) {\n return _ref2.apply(this, arguments);\n }\n\n return fetchResource;\n})();\n\nfunction convertMetaProp($, from, to) {\n $('meta[' + from + ']').each(function (_, node) {\n var $node = $(node);\n\n var value = $node.attr(from);\n $node.attr(to, value);\n $node.removeAttr(from);\n });\n\n return $;\n}\n\n// For ease of use in extracting from meta tags,\n// replace the \"content\" attribute on meta tags with the\n// \"value\" attribute.\n//\n// In addition, normalize 'property' attributes to 'name' for ease of\n// querying later. See, e.g., og or twitter meta tags.\n\nfunction normalizeMetaTags($) {\n $ = convertMetaProp($, 'content', 'value');\n $ = convertMetaProp($, 'property', 'name');\n return $;\n}\n\nvar IS_LINK = new RegExp('https?://', 'i');\nvar IS_IMAGE = new RegExp('.(png|gif|jpe?g)', 'i');\n\nvar TAGS_TO_REMOVE = ['script', 'style', 'form'].join(',');\n\n// Convert all instances of images with potentially\n// lazy loaded images into normal images.\n// Many sites will have img tags with no source, or an image tag with a src\n// attribute that a is a placeholer. We need to be able to properly fill in\n// the src attribute so the images are no longer lazy loaded.\nfunction convertLazyLoadedImages($) {\n $('img').each(function (_, img) {\n _Reflect$ownKeys(img.attribs).forEach(function (attr) {\n var value = img.attribs[attr];\n\n if (attr !== 'src' && IS_LINK.test(value) && IS_IMAGE.test(value)) {\n $(img).attr('src', value);\n }\n });\n });\n\n return $;\n}\n\nfunction isComment(index, node) {\n return node.type === 'comment';\n}\n\nfunction cleanComments($) {\n $.root().find('*').contents().filter(isComment).remove();\n\n return $;\n}\n\nfunction clean($) {\n $(TAGS_TO_REMOVE).remove();\n\n $ = cleanComments($);\n return $;\n}\n\nvar Resource = {\n\n // Create a Resource.\n //\n // :param url: The URL for the document we should retrieve.\n // :param response: If set, use as the response rather than\n // attempting to fetch it ourselves. Expects a\n // string.\n create: function create(url, preparedResponse, parsedUrl) {\n var _this = this;\n\n return _asyncToGenerator(_regeneratorRuntime.mark(function _callee() {\n var result, validResponse;\n return _regeneratorRuntime.wrap(function _callee$(_context) {\n while (1) {\n switch (_context.prev = _context.next) {\n case 0:\n result = void 0;\n\n if (!preparedResponse) {\n _context.next = 6;\n break;\n }\n\n validResponse = {\n statusMessage: 'OK',\n statusCode: 200,\n headers: {\n 'content-type': 'text/html',\n 'content-length': 500\n }\n };\n\n\n result = { body: preparedResponse, response: validResponse };\n _context.next = 9;\n break;\n\n case 6:\n _context.next = 8;\n return fetchResource$1(url, parsedUrl);\n\n case 8:\n result = _context.sent;\n\n case 9:\n if (!result.error) {\n _context.next = 11;\n break;\n }\n\n return _context.abrupt('return', result);\n\n case 11:\n return _context.abrupt('return', _this.generateDoc(result));\n\n case 12:\n case 'end':\n return _context.stop();\n }\n }\n }, _callee, _this);\n }))();\n },\n generateDoc: function generateDoc(_ref) {\n var content = _ref.body,\n response = _ref.response;\n var contentType = response.headers['content-type'];\n\n // TODO: Implement is_text function from\n // https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57\n\n if (!contentType.includes('html') && !contentType.includes('text')) {\n throw new Error('Content does not appear to be text.');\n }\n\n var $ = cheerio.load(content, { normalizeWhitespace: true });\n\n if ($.root().children().length === 0) {\n throw new Error('No children, likely a bad parse.');\n }\n\n $ = normalizeMetaTags($);\n $ = convertLazyLoadedImages($);\n $ = clean($);\n\n return $;\n }\n};\n\nvar merge = function merge(extractor, domains) {\n return domains.reduce(function (acc, domain) {\n acc[domain] = extractor;\n return acc;\n }, {});\n};\n\nfunction mergeSupportedDomains(extractor) {\n return extractor.supportedDomains ? merge(extractor, [extractor.domain].concat(_toConsumableArray(extractor.supportedDomains))) : merge(extractor, [extractor.domain]);\n}\n\nvar BloggerExtractor = {\n domain: 'blogspot.com',\n content: {\n // Blogger is insane and does not load its content\n // initially in the page, but it's all there\n // in noscript\n selectors: ['.post-content noscript'],\n\n // Selectors to remove from the extracted content\n clean: [],\n\n // Convert the noscript tag to a div\n transforms: {\n noscript: 'div'\n }\n },\n\n author: {\n selectors: ['.post-author-name']\n },\n\n title: {\n selectors: ['.post h2.title']\n },\n\n date_published: {\n selectors: ['span.publishdate']\n }\n};\n\nvar NYMagExtractor = {\n domain: 'nymag.com',\n content: {\n // Order by most likely. Extractor will stop on first occurrence\n selectors: ['div.article-content', 'section.body', 'article.article'],\n\n // Selectors to remove from the extracted content\n clean: ['.ad', '.single-related-story'],\n\n // Object of tranformations to make on matched elements\n // Each key is the selector, each value is the tag to\n // transform to.\n // If a function is given, it should return a string\n // to convert to or nothing (in which case it will not perform\n // the transformation.\n transforms: {\n // Convert h1s to h2s\n h1: 'h2',\n\n // Convert lazy-loaded noscript images to figures\n noscript: function noscript($node) {\n var $children = $node.children();\n if ($children.length === 1 && $children.get(0).tagName === 'img') {\n return 'figure';\n }\n\n return null;\n }\n }\n },\n\n title: {\n selectors: ['h1.lede-feature-title', 'h1.headline-primary', 'h1']\n },\n\n author: {\n selectors: ['.by-authors', '.lede-feature-author']\n },\n\n dek: {\n selectors: ['.lede-feature-teaser']\n },\n\n date_published: {\n selectors: [['time.article-timestamp[datetime]', 'datetime'], 'time.article-timestamp']\n }\n};\n\nvar WikipediaExtractor = {\n domain: 'wikipedia.org',\n content: {\n selectors: ['#mw-content-text'],\n\n defaultCleaner: false,\n\n // transform top infobox to an image with caption\n transforms: {\n '.infobox img': function infoboxImg($node) {\n var $parent = $node.parents('.infobox');\n // Only prepend the first image in .infobox\n if ($parent.children('img').length === 0) {\n $parent.prepend($node);\n }\n },\n '.infobox caption': 'figcaption',\n '.infobox': 'figure'\n },\n\n // Selectors to remove from the extracted content\n clean: ['.mw-editsection', 'figure tr, figure td, figure tbody', '#toc', '.navbox']\n\n },\n\n author: 'Wikipedia Contributors',\n\n title: {\n selectors: ['h2.title']\n },\n\n date_published: {\n selectors: ['#footer-info-lastmod']\n }\n\n};\n\nvar TwitterExtractor = {\n domain: 'twitter.com',\n\n content: {\n transforms: {\n // We're transforming essentially the whole page here.\n // Twitter doesn't have nice selectors, so our initial\n // selector grabs the whole page, then we're re-writing\n // it to fit our needs before we clean it up.\n '.permalink[role=main]': function permalinkRoleMain($node, $) {\n var tweets = $node.find('.tweet');\n var $tweetContainer = $('');\n $tweetContainer.append(tweets);\n $node.replaceWith($tweetContainer);\n },\n\n // Twitter wraps @ with s, which\n // renders as a strikethrough\n s: 'span'\n },\n\n selectors: ['.permalink[role=main]'],\n\n defaultCleaner: false,\n\n clean: ['.stream-item-footer', 'button', '.tweet-details-fixer']\n },\n\n author: {\n selectors: ['.tweet.permalink-tweet .username']\n },\n\n date_published: {\n selectors: [['.permalink-tweet ._timestamp[data-time-ms]', 'data-time-ms']]\n }\n\n};\n\nvar NYTimesExtractor = {\n domain: 'www.nytimes.com',\n\n title: {\n selectors: ['.g-headline', 'h1.headline']\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value'], '.g-byline', '.byline']\n },\n\n content: {\n selectors: ['div.g-blocks', 'article#story'],\n\n defaultCleaner: false,\n\n transforms: {\n 'img.g-lazy': function imgGLazy($node) {\n var src = $node.attr('src');\n // const widths = $node.attr('data-widths')\n // .slice(1)\n // .slice(0, -1)\n // .split(',');\n // if (widths.length) {\n // width = widths.slice(-1);\n // } else {\n // width = '900';\n // }\n var width = 640;\n\n src = src.replace('{{size}}', width);\n $node.attr('src', src);\n }\n },\n\n clean: ['.ad', 'header#story-header', '.story-body-1 .lede.video', '.visually-hidden', '#newsletter-promo', '.promo', '.comments-button', '.hidden']\n },\n\n date_published: null,\n\n lead_image_url: null,\n\n dek: null,\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\nvar TheAtlanticExtractor = {\n domain: 'www.theatlantic.com',\n title: {\n selectors: ['h1.hed']\n },\n\n author: {\n selectors: ['article#article .article-cover-extra .metadata .byline a']\n },\n\n content: {\n selectors: ['.article-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n date_published: null,\n\n lead_image_url: null,\n\n dek: null,\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar NewYorkerExtractor = {\n domain: 'www.newyorker.com',\n title: {\n selectors: ['h1.title']\n },\n\n author: {\n selectors: ['.contributors']\n },\n\n content: {\n selectors: ['div#articleBody', 'div.articleBody'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar WiredExtractor = {\n domain: 'www.wired.com',\n title: {\n selectors: ['h1.post-title']\n },\n\n author: {\n selectors: ['a[rel=\"author\"]']\n },\n\n content: {\n selectors: ['article.content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.visually-hidden']\n },\n\n date_published: {\n selectors: [['meta[itemprop=\"datePublished\"]', 'value']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar MSNExtractor = {\n domain: 'www.msn.com',\n title: {\n selectors: ['h1']\n },\n\n author: {\n selectors: ['span.authorname-txt']\n },\n\n content: {\n selectors: ['div.richtext'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['span.caption']\n },\n\n date_published: {\n selectors: ['span.time']\n },\n\n lead_image_url: {\n selectors: []\n },\n\n dek: {\n selectors: [['meta[name=\"description\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar YahooExtractor = {\n domain: 'www.yahoo.com',\n title: {\n selectors: ['header.canvas-header']\n },\n\n author: {\n selectors: ['span.provider-name']\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.content-canvas'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.figure-caption']\n },\n\n date_published: {\n selectors: [['time.date[datetime]', 'datetime']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar BuzzfeedExtractor = {\n domain: 'www.buzzfeed.com',\n title: {\n selectors: ['h1[id=\"post-title\"]']\n },\n\n author: {\n selectors: ['a[data-action=\"user/username\"]', 'byline__author']\n },\n\n content: {\n selectors: ['#buzz_sub_buzz'],\n\n defaultCleaner: false,\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n h2: 'b'\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.instapaper_ignore', '.suplist_list_hide .buzz_superlist_item .buzz_superlist_number_inline', '.share-box']\n },\n\n date_published: {\n selectors: ['.buzz-datetime']\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=\"description\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar WikiaExtractor = {\n domain: 'fandom.wikia.com',\n title: {\n selectors: ['h1.entry-title']\n },\n\n author: {\n selectors: ['.author vcard', '.fn']\n },\n\n content: {\n selectors: ['.grid-content', '.entry-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar LittleThingsExtractor = {\n domain: 'www.littlethings.com',\n title: {\n selectors: ['h1.post-title']\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']]\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.mainContentIntro', '.content-wrapper'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar PoliticoExtractor = {\n domain: 'www.politico.com',\n title: {\n selectors: [\n // enter title selectors\n ['meta[name=\"og:title\"]', 'value']]\n },\n\n author: {\n selectors: ['.story-main-content .byline .vcard']\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.story-main-content', '.content-group', '.story-core', '.story-text'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['figcaption']\n },\n\n date_published: {\n selectors: [['.story-main-content .timestamp time[datetime]', 'datetime']]\n },\n\n lead_image_url: {\n selectors: [\n // enter lead_image_url selectors\n ['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=\"description\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\nvar DeadspinExtractor = {\n domain: 'deadspin.com',\n\n supportedDomains: ['jezebel.com', 'lifehacker.com', 'kotaku.com', 'gizmodo.com', 'jalopnik.com', 'kinja.com'],\n\n title: {\n selectors: ['h1.headline']\n },\n\n author: {\n selectors: ['.author']\n },\n\n content: {\n selectors: ['.post-content', '.entry-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'iframe.lazyload[data-recommend-id^=\"youtube://\"]': function iframeLazyloadDataRecommendIdYoutube($node) {\n var youtubeId = $node.attr('id').split('youtube-')[1];\n $node.attr('src', 'https://www.youtube.com/embed/' + youtubeId);\n }\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n date_published: {\n selectors: [['time.updated[datetime]', 'datetime']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [\n // enter selectors\n ]\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ]\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ]\n }\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar BroadwayWorldExtractor = {\n domain: 'www.broadwayworld.com',\n title: {\n selectors: ['h1.article-title']\n },\n\n author: {\n selectors: ['span[itemprop=author]']\n },\n\n content: {\n selectors: ['div[itemprop=articlebody]'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n date_published: {\n selectors: [['meta[itemprop=datePublished]', 'value']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']]\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ]\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ]\n }\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar ApartmentTherapyExtractor = {\n domain: 'www.apartmenttherapy.com',\n title: {\n selectors: ['h1.headline']\n },\n\n author: {\n selectors: ['.PostByline__name']\n },\n\n content: {\n selectors: ['div.post__content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div[data-render-react-id=\"images/LazyPicture\"]': function divDataRenderReactIdImagesLazyPicture($node, $) {\n var data = JSON.parse($node.attr('data-props'));\n var src = data.sources[0].src;\n\n var $img = $('').attr('src', src);\n $node.replaceWith($img);\n }\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n date_published: {\n selectors: [['.PostByline__timestamp[datetime]', 'datetime']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=description]', 'value']]\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ]\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ]\n }\n};\n\nvar MediumExtractor = {\n domain: 'medium.com',\n\n supportedDomains: ['trackchanges.postlight.com'],\n\n title: {\n selectors: ['h1']\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']]\n },\n\n content: {\n selectors: ['.section-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n // Re-write lazy-loaded youtube videos\n iframe: function iframe($node) {\n var ytRe = /https:\\/\\/i.embed.ly\\/.+url=https:\\/\\/i\\.ytimg\\.com\\/vi\\/(\\w+)\\//;\n var thumb = decodeURIComponent($node.attr('data-thumbnail'));\n\n if (ytRe.test(thumb)) {\n var _thumb$match = thumb.match(ytRe),\n _thumb$match2 = _slicedToArray(_thumb$match, 2),\n _ = _thumb$match2[0],\n youtubeId = _thumb$match2[1]; // eslint-disable-line\n\n\n $node.attr('src', 'https://www.youtube.com/embed/' + youtubeId);\n var $parent = $node.parents('figure');\n $parent.prepend($node.clone());\n $node.remove();\n }\n }\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n date_published: {\n selectors: [['time[datetime]', 'datetime']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [\n // enter selectors\n ]\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ]\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ]\n }\n};\n\n\n\nvar CustomExtractors = Object.freeze({\n\tBloggerExtractor: BloggerExtractor,\n\tNYMagExtractor: NYMagExtractor,\n\tWikipediaExtractor: WikipediaExtractor,\n\tTwitterExtractor: TwitterExtractor,\n\tNYTimesExtractor: NYTimesExtractor,\n\tTheAtlanticExtractor: TheAtlanticExtractor,\n\tNewYorkerExtractor: NewYorkerExtractor,\n\tWiredExtractor: WiredExtractor,\n\tMSNExtractor: MSNExtractor,\n\tYahooExtractor: YahooExtractor,\n\tBuzzfeedExtractor: BuzzfeedExtractor,\n\tWikiaExtractor: WikiaExtractor,\n\tLittleThingsExtractor: LittleThingsExtractor,\n\tPoliticoExtractor: PoliticoExtractor,\n\tDeadspinExtractor: DeadspinExtractor,\n\tBroadwayWorldExtractor: BroadwayWorldExtractor,\n\tApartmentTherapyExtractor: ApartmentTherapyExtractor,\n\tMediumExtractor: MediumExtractor\n});\n\nvar Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {\n var extractor = CustomExtractors[key];\n return _extends({}, acc, mergeSupportedDomains(extractor));\n}, {});\n\n// Spacer images to be removed\nvar SPACER_RE = new RegExp('trans|transparent|spacer|blank', 'i');\n\n// The class we will use to mark elements we want to keep\n// but would normally remove\nvar KEEP_CLASS = 'mercury-parser-keep';\n\nvar KEEP_SELECTORS = ['iframe[src^=\"https://www.youtube.com\"]', 'iframe[src^=\"http://www.youtube.com\"]', 'iframe[src^=\"https://player.vimeo\"]', 'iframe[src^=\"http://player.vimeo\"]'];\n\n// A list of tags to strip from the output if we encounter them.\nvar STRIP_OUTPUT_TAGS = ['title', 'script', 'noscript', 'link', 'style', 'hr', 'embed', 'iframe', 'object'];\n\n// cleanAttributes\nvar REMOVE_ATTRS = ['style', 'align'];\nvar REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(function (selector) {\n return '[' + selector + ']';\n});\nvar REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');\nvar WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt'];\nvar WHITELIST_ATTRS_RE = new RegExp('^(' + WHITELIST_ATTRS.join('|') + ')$', 'i');\n\n// removeEmpty\nvar REMOVE_EMPTY_TAGS = ['p'];\nvar REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(function (tag) {\n return tag + ':empty';\n}).join(',');\n\n// cleanTags\nvar CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');\n\n// cleanHeaders\nvar HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];\nvar HEADER_TAG_LIST = HEADER_TAGS.join(',');\n\n// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nvar UNLIKELY_CANDIDATES_BLACKLIST = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot',\n// 'form', // This is too generic, has too many false positives\n'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.\n'menu', 'meta', 'nav', 'outbrain', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box\n'presence_control_external', // lifehacker.com container full of false positives\n'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'taboola', 'tools'];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nvar UNLIKELY_CANDIDATES_WHITELIST = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form\n'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nvar DIV_TO_P_BLOCK_TAGS = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\n\n\n\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\n\n\n\n\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nvar POSITIVE_SCORE_HINTS = ['article', 'articlecontent', 'instapaper_body', 'blog', 'body', 'content', 'entry-content-asset', 'entry', 'hentry', 'main', 'Normal', 'page', 'pagination', 'permalink', 'post', 'story', 'text', '[-_]copy', // usatoday\n'\\\\Bcopy'];\n\n// The above list, joined into a matching regular expression\nvar POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\n\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nvar NEGATIVE_SCORE_HINTS = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'bottom', 'byline', 'clear', 'com-', 'combx', 'comment', 'comment\\\\B', 'contact', 'copy', 'credit', 'crumb', 'date', 'deck', 'excerpt', 'featured', // tnr.com has a featured_content which throws us off\n'foot', 'footer', 'footnote', 'graf', 'head', 'info', 'infotext', // newscientist.com copyright\n'instapaper_ignore', 'jump', 'linebreak', 'link', 'masthead', 'media', 'meta', 'modal', 'outbrain', // slate.com junk\n'promo', 'pr_', // autoblog - press release\n'related', 'respond', 'roundcontent', // lifehacker restricted content warning\n'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget'];\n// The above list, joined into a matching regular expression\nvar NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// XPath to try to determine if a page is wordpress. Not always successful.\nvar IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';\n\n// Match a digit. Pretty clear.\n\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\n\n\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nvar PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\n// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))', 'i');\n\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\n\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\n\n\n// Match 2 or more consecutive tags\n\n\n// Match 1 BR tag.\n\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nvar BLOCK_LEVEL_TAGS = ['article', 'aside', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'col', 'colgroup', 'dd', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'map', 'object', 'ol', 'output', 'p', 'pre', 'progress', 'section', 'table', 'tbody', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'ul', 'video'];\nvar BLOCK_LEVEL_TAGS_RE = new RegExp('^(' + BLOCK_LEVEL_TAGS.join('|') + ')$', 'i');\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nvar candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nvar CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nvar candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nvar CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nfunction stripUnlikelyCandidates($) {\n // Loop through the provided document and remove any non-link nodes\n // that are unlikely candidates for article content.\n //\n // Links are ignored because there are very often links to content\n // that are identified as non-body-content, but may be inside\n // article-like content.\n //\n // :param $: a cheerio object to strip nodes from\n // :return $: the cleaned cheerio object\n $('*').not('a').each(function (index, node) {\n var $node = $(node);\n var classes = $node.attr('class');\n var id = $node.attr('id');\n if (!id && !classes) return;\n\n var classAndId = (classes || '') + ' ' + (id || '');\n if (CANDIDATES_WHITELIST.test(classAndId)) {\n return;\n } else if (CANDIDATES_BLACKLIST.test(classAndId)) {\n $node.remove();\n }\n });\n\n return $;\n}\n\n// ## NOTES:\n// Another good candidate for refactoring/optimizing.\n// Very imperative code, I don't love it. - AP\n\n// Given cheerio object, convert consecutive tags into\n// tags instead.\n//\n// :param $: A cheerio object\n\nfunction brsToPs$$1($) {\n var collapsing = false;\n $('br').each(function (index, element) {\n var nextElement = $(element).next().get(0);\n\n if (nextElement && nextElement.tagName === 'br') {\n collapsing = true;\n $(element).remove();\n } else if (collapsing) {\n collapsing = false;\n // $(element).replaceWith('')\n paragraphize(element, $, true);\n }\n });\n\n return $;\n}\n\n// Given a node, turn it into a P if it is not already a P, and\n// make sure it conforms to the constraints of a P tag (I.E. does\n// not contain any other block tags.)\n//\n// If the node is a , it treats the following inline siblings\n// as if they were its children.\n//\n// :param node: The node to paragraphize; this is a raw node\n// :param $: The cheerio object to handle dom manipulation\n// :param br: Whether or not the passed node is a br\n\nfunction paragraphize(node, $) {\n var br = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;\n\n var $node = $(node);\n\n if (br) {\n var sibling = node.nextSibling;\n var p = $('');\n\n // while the next node is text or not a block level element\n // append it to a new p node\n while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))) {\n var nextSibling = sibling.nextSibling;\n $(sibling).appendTo(p);\n sibling = nextSibling;\n }\n\n $node.replaceWith(p);\n $node.remove();\n return $;\n }\n\n return $;\n}\n\nfunction convertDivs($) {\n $('div').each(function (index, div) {\n var $div = $(div);\n var convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;\n\n if (convertable) {\n convertNodeTo($div, $, 'p');\n }\n });\n\n return $;\n}\n\nfunction convertSpans($) {\n $('span').each(function (index, span) {\n var $span = $(span);\n var convertable = $span.parents('p, div').length === 0;\n if (convertable) {\n convertNodeTo($span, $, 'p');\n }\n });\n\n return $;\n}\n\n// Loop through the provided doc, and convert any p-like elements to\n// actual paragraph tags.\n//\n// Things fitting this criteria:\n// * Multiple consecutive tags.\n// * tags without block level elements inside of them\n// * tags who are not children of or tags.\n//\n// :param $: A cheerio object to search\n// :return cheerio object with new p elements\n// (By-reference mutation, though. Returned just for convenience.)\n\nfunction convertToParagraphs$$1($) {\n $ = brsToPs$$1($);\n $ = convertDivs($);\n $ = convertSpans($);\n\n return $;\n}\n\nfunction convertNodeTo($node, $) {\n var tag = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'p';\n\n var node = $node.get(0);\n if (!node) {\n return $;\n }\n\n var _$node$get = $node.get(0),\n attribs = _$node$get.attribs;\n\n var attribString = _Reflect$ownKeys(attribs).map(function (key) {\n return key + '=' + attribs[key];\n }).join(' ');\n\n $node.replaceWith('<' + tag + ' ' + attribString + '>' + $node.contents() + '' + tag + '>');\n return $;\n}\n\nfunction cleanForHeight($img, $) {\n var height = parseInt($img.attr('height'), 10);\n var width = parseInt($img.attr('width'), 10) || 20;\n\n // Remove images that explicitly have very small heights or\n // widths, because they are most likely shims or icons,\n // which aren't very useful for reading.\n if ((height || 20) < 10 || width < 10) {\n $img.remove();\n } else if (height) {\n // Don't ever specify a height on images, so that we can\n // scale with respect to width without screwing up the\n // aspect ratio.\n $img.removeAttr('height');\n }\n\n return $;\n}\n\n// Cleans out images where the source string matches transparent/spacer/etc\n// TODO This seems very aggressive - AP\nfunction removeSpacers($img, $) {\n if (SPACER_RE.test($img.attr('src'))) {\n $img.remove();\n }\n\n return $;\n}\n\nfunction cleanImages($article, $) {\n $article.find('img').each(function (index, img) {\n var $img = $(img);\n\n cleanForHeight($img, $);\n removeSpacers($img, $);\n });\n\n return $;\n}\n\nfunction markToKeep(article, $, url) {\n var tags = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : [];\n\n if (tags.length === 0) {\n tags = KEEP_SELECTORS;\n }\n\n if (url) {\n var _URL$parse = URL.parse(url),\n protocol = _URL$parse.protocol,\n hostname = _URL$parse.hostname;\n\n tags = [].concat(_toConsumableArray(tags), ['iframe[src^=\"' + protocol + '//' + hostname + '\"]']);\n }\n\n $(tags.join(','), article).addClass(KEEP_CLASS);\n\n return $;\n}\n\nfunction stripJunkTags(article, $) {\n var tags = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : [];\n\n if (tags.length === 0) {\n tags = STRIP_OUTPUT_TAGS;\n }\n\n // Remove matching elements, but ignore\n // any element with a class of mercury-parser-keep\n $(tags.join(','), article).not('.' + KEEP_CLASS).remove();\n\n // Remove the mercury-parser-keep class from result\n $('.' + KEEP_CLASS, article).removeClass(KEEP_CLASS);\n\n return $;\n}\n\n// H1 tags are typically the article title, which should be extracted\n// by the title extractor instead. If there's less than 3 of them (<3),\n// strip them. Otherwise, turn 'em into H2s.\n\nfunction cleanHOnes$$1(article, $) {\n var $hOnes = $('h1', article);\n\n if ($hOnes.length < 3) {\n $hOnes.each(function (index, node) {\n return $(node).remove();\n });\n } else {\n $hOnes.each(function (index, node) {\n convertNodeTo($(node), $, 'h2');\n });\n }\n\n return $;\n}\n\nfunction removeAllButWhitelist($article) {\n $article.find('*').each(function (index, node) {\n node.attribs = _Reflect$ownKeys(node.attribs).reduce(function (acc, attr) {\n if (WHITELIST_ATTRS_RE.test(attr)) {\n return _extends({}, acc, _defineProperty({}, attr, node.attribs[attr]));\n }\n\n return acc;\n }, {});\n });\n\n return $article;\n}\n\n// function removeAttrs(article, $) {\n// REMOVE_ATTRS.forEach((attr) => {\n// $(`[${attr}]`, article).removeAttr(attr);\n// });\n// }\n\n// Remove attributes like style or align\nfunction cleanAttributes($article) {\n // Grabbing the parent because at this point\n // $article will be wrapped in a div which will\n // have a score set on it.\n return removeAllButWhitelist($article.parent().length ? $article.parent() : $article);\n}\n\nfunction removeEmpty($article, $) {\n $article.find('p').each(function (index, p) {\n var $p = $(p);\n if ($p.find('iframe, img').length === 0 && $p.text().trim() === '') $p.remove();\n });\n\n return $;\n}\n\n// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nvar UNLIKELY_CANDIDATES_BLACKLIST$1 = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot', 'form', 'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.\n'menu', 'meta', 'nav', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box\n'presence_control_external', // lifehacker.com container full of false positives\n'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'tools'];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nvar UNLIKELY_CANDIDATES_WHITELIST$1 = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form\n'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nvar DIV_TO_P_BLOCK_TAGS$1 = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nvar NON_TOP_CANDIDATE_TAGS$1 = ['br', 'b', 'i', 'label', 'hr', 'area', 'base', 'basefont', 'input', 'img', 'link', 'meta'];\n\nvar NON_TOP_CANDIDATE_TAGS_RE$1 = new RegExp('^(' + NON_TOP_CANDIDATE_TAGS$1.join('|') + ')$', 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nvar HNEWS_CONTENT_SELECTORS$1 = [['.hentry', '.entry-content'], ['entry', '.entry-content'], ['.entry', '.entry_content'], ['.post', '.postbody'], ['.post', '.post_body'], ['.post', '.post-body']];\n\nvar PHOTO_HINTS$1 = ['figure', 'photo', 'image', 'caption'];\nvar PHOTO_HINTS_RE$1 = new RegExp(PHOTO_HINTS$1.join('|'), 'i');\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nvar POSITIVE_SCORE_HINTS$1 = ['article', 'articlecontent', 'instapaper_body', 'blog', 'body', 'content', 'entry-content-asset', 'entry', 'hentry', 'main', 'Normal', 'page', 'pagination', 'permalink', 'post', 'story', 'text', '[-_]copy', // usatoday\n'\\\\Bcopy'];\n\n// The above list, joined into a matching regular expression\nvar POSITIVE_SCORE_RE$1 = new RegExp(POSITIVE_SCORE_HINTS$1.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nvar READABILITY_ASSET$1 = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nvar NEGATIVE_SCORE_HINTS$1 = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'bottom', 'byline', 'clear', 'com-', 'combx', 'comment', 'comment\\\\B', 'contact', 'copy', 'credit', 'crumb', 'date', 'deck', 'excerpt', 'featured', // tnr.com has a featured_content which throws us off\n'foot', 'footer', 'footnote', 'graf', 'head', 'info', 'infotext', // newscientist.com copyright\n'instapaper_ignore', 'jump', 'linebreak', 'link', 'masthead', 'media', 'meta', 'modal', 'outbrain', // slate.com junk\n'promo', 'pr_', // autoblog - press release\n'related', 'respond', 'roundcontent', // lifehacker restricted content warning\n'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget'];\n// The above list, joined into a matching regular expression\nvar NEGATIVE_SCORE_RE$1 = new RegExp(NEGATIVE_SCORE_HINTS$1.join('|'), 'i');\n\n// Match a digit. Pretty clear.\n\n\n// Match 2 or more consecutive tags\n\n\n// Match 1 BR tag.\n\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\n\n\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nvar candidatesBlacklist$1 = UNLIKELY_CANDIDATES_BLACKLIST$1.join('|');\n\n\nvar candidatesWhitelist$1 = UNLIKELY_CANDIDATES_WHITELIST$1.join('|');\n\n\n\n\nvar PARAGRAPH_SCORE_TAGS$1 = new RegExp('^(p|li|span|pre)$', 'i');\nvar CHILD_CONTENT_TAGS$1 = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nvar BAD_TAGS$1 = new RegExp('^(address|form)$', 'i');\n\n// Get the score of a node based on its className and id.\nfunction getWeight(node) {\n var classes = node.attr('class');\n var id = node.attr('id');\n var score = 0;\n\n if (id) {\n // if id exists, try to score on both positive and negative\n if (POSITIVE_SCORE_RE$1.test(id)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE$1.test(id)) {\n score -= 25;\n }\n }\n\n if (classes) {\n if (score === 0) {\n // if classes exist and id did not contribute to score\n // try to score on both positive and negative\n if (POSITIVE_SCORE_RE$1.test(classes)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE$1.test(classes)) {\n score -= 25;\n }\n }\n\n // even if score has been set by id, add score for\n // possible photo matches\n // \"try to keep photos if we can\"\n if (PHOTO_HINTS_RE$1.test(classes)) {\n score += 10;\n }\n\n // add 25 if class matches entry-content-asset,\n // a class apparently instructed for use in the\n // Readability publisher guidelines\n // https://www.readability.com/developers/guidelines\n if (READABILITY_ASSET$1.test(classes)) {\n score += 25;\n }\n }\n\n return score;\n}\n\n// returns the score of a node based on\n// the node's score attribute\n// returns null if no score set\nfunction getScore($node) {\n return parseFloat($node.attr('score')) || null;\n}\n\n// return 1 for every comma in text\nfunction scoreCommas(text) {\n return (text.match(/,/g) || []).length;\n}\n\nvar idkRe = new RegExp('^(p|pre)$', 'i');\n\nfunction scoreLength(textLength) {\n var tagName = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'p';\n\n var chunks = textLength / 50;\n\n if (chunks > 0) {\n var lengthBonus = void 0;\n\n // No idea why p or pre are being tamped down here\n // but just following the source for now\n // Not even sure why tagName is included here,\n // since this is only being called from the context\n // of scoreParagraph\n if (idkRe.test(tagName)) {\n lengthBonus = chunks - 2;\n } else {\n lengthBonus = chunks - 1.25;\n }\n\n return Math.min(Math.max(lengthBonus, 0), 3);\n }\n\n return 0;\n}\n\n// Score a paragraph using various methods. Things like number of\n// commas, etc. Higher is better.\nfunction scoreParagraph$$1(node) {\n var score = 1;\n var text = node.text().trim();\n var textLength = text.length;\n\n // If this paragraph is less than 25 characters, don't count it.\n if (textLength < 25) {\n return 0;\n }\n\n // Add points for any commas within this paragraph\n score += scoreCommas(text);\n\n // For every 50 characters in this paragraph, add another point. Up\n // to 3 points.\n score += scoreLength(textLength);\n\n // Articles can end with short paragraphs when people are being clever\n // but they can also end with short paragraphs setting up lists of junk\n // that we strip. This negative tweaks junk setup paragraphs just below\n // the cutoff threshold.\n if (text.slice(-1) === ':') {\n score -= 1;\n }\n\n return score;\n}\n\nfunction setScore($node, $, score) {\n $node.attr('score', score);\n return $node;\n}\n\nfunction addScore$$1($node, $, amount) {\n try {\n var score = getOrInitScore$$1($node, $) + amount;\n setScore($node, $, score);\n } catch (e) {\n // Ignoring; error occurs in scoreNode\n }\n\n return $node;\n}\n\n// Adds 1/4 of a child's score to its parent\nfunction addToParent$$1(node, $, score) {\n var parent = node.parent();\n if (parent) {\n addScore$$1(parent, $, score * 0.25);\n }\n\n return node;\n}\n\n// gets and returns the score if it exists\n// if not, initializes a score based on\n// the node's tag type\nfunction getOrInitScore$$1($node, $) {\n var weightNodes = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : true;\n\n var score = getScore($node);\n\n if (score) {\n return score;\n }\n\n score = scoreNode$$1($node);\n\n if (weightNodes) {\n score += getWeight($node);\n }\n\n addToParent$$1($node, $, score);\n\n return score;\n}\n\n// Score an individual node. Has some smarts for paragraphs, otherwise\n// just scores based on tag.\nfunction scoreNode$$1($node) {\n var _$node$get = $node.get(0),\n tagName = _$node$get.tagName;\n\n // TODO: Consider ordering by most likely.\n // E.g., if divs are a more common tag on a page,\n // Could save doing that regex test on every node – AP\n\n\n if (PARAGRAPH_SCORE_TAGS$1.test(tagName)) {\n return scoreParagraph$$1($node);\n } else if (tagName === 'div') {\n return 5;\n } else if (CHILD_CONTENT_TAGS$1.test(tagName)) {\n return 3;\n } else if (BAD_TAGS$1.test(tagName)) {\n return -3;\n } else if (tagName === 'th') {\n return -5;\n }\n\n return 0;\n}\n\nfunction convertSpans$1($node, $) {\n if ($node.get(0)) {\n var _$node$get = $node.get(0),\n tagName = _$node$get.tagName;\n\n if (tagName === 'span') {\n // convert spans to divs\n convertNodeTo($node, $, 'div');\n }\n }\n}\n\nfunction addScoreTo($node, $, score) {\n if ($node) {\n convertSpans$1($node, $);\n addScore$$1($node, $, score);\n }\n}\n\nfunction scorePs($, weightNodes) {\n $('p, pre').not('[score]').each(function (index, node) {\n // The raw score for this paragraph, before we add any parent/child\n // scores.\n var $node = $(node);\n $node = setScore($node, $, getOrInitScore$$1($node, $, weightNodes));\n\n var $parent = $node.parent();\n var rawScore = scoreNode$$1($node);\n\n addScoreTo($parent, $, rawScore, weightNodes);\n if ($parent) {\n // Add half of the individual content score to the\n // grandparent\n addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);\n }\n });\n\n return $;\n}\n\n// score content. Parents get the full value of their children's\n// content score, grandparents half\nfunction scoreContent$$1($) {\n var weightNodes = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : true;\n\n // First, look for special hNews based selectors and give them a big\n // boost, if they exist\n HNEWS_CONTENT_SELECTORS$1.forEach(function (_ref) {\n var _ref2 = _slicedToArray(_ref, 2),\n parentSelector = _ref2[0],\n childSelector = _ref2[1];\n\n $(parentSelector + ' ' + childSelector).each(function (index, node) {\n addScore$$1($(node).parent(parentSelector), $, 80);\n });\n });\n\n // Doubling this again\n // Previous solution caused a bug\n // in which parents weren't retaining\n // scores. This is not ideal, and\n // should be fixed.\n scorePs($, weightNodes);\n scorePs($, weightNodes);\n\n return $;\n}\n\nvar NORMALIZE_RE = /\\s{2,}/g;\n\nfunction normalizeSpaces(text) {\n return text.replace(NORMALIZE_RE, ' ').trim();\n}\n\n// Given a node type to search for, and a list of regular expressions,\n// look to see if this extraction can be found in the URL. Expects\n// that each expression in r_list will return group(1) as the proper\n// string to be cleaned.\n// Only used for date_published currently.\nfunction extractFromUrl(url, regexList) {\n var matchRe = regexList.find(function (re) {\n return re.test(url);\n });\n if (matchRe) {\n return matchRe.exec(url)[1];\n }\n\n return null;\n}\n\n// An expression that looks to try to find the page digit within a URL, if\n// it exists.\n// Matches:\n// page=1\n// pg=1\n// p=1\n// paging=12\n// pag=7\n// pagination/1\n// paging/88\n// pa/83\n// p/11\n//\n// Does not match:\n// pg=102\n// page:2\nvar PAGE_IN_HREF_RE = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');\n\nvar HAS_ALPHA_RE = /[a-z]/i;\n\nvar IS_ALPHA_RE = /^[a-z]+$/i;\nvar IS_DIGIT_RE = /^[0-9]+$/i;\n\nfunction pageNumFromUrl(url) {\n var matches = url.match(PAGE_IN_HREF_RE);\n if (!matches) return null;\n\n var pageNum = parseInt(matches[6], 10);\n\n // Return pageNum < 100, otherwise\n // return null\n return pageNum < 100 ? pageNum : null;\n}\n\nfunction removeAnchor(url) {\n return url.split('#')[0].replace(/\\/$/, '');\n}\n\nfunction isGoodSegment(segment, index, firstSegmentHasLetters) {\n var goodSegment = true;\n\n // If this is purely a number, and it's the first or second\n // url_segment, it's probably a page number. Remove it.\n if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {\n goodSegment = true;\n }\n\n // If this is the first url_segment and it's just \"index\",\n // remove it\n if (index === 0 && segment.toLowerCase() === 'index') {\n goodSegment = false;\n }\n\n // If our first or second url_segment is smaller than 3 characters,\n // and the first url_segment had no alphas, remove it.\n if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {\n goodSegment = false;\n }\n\n return goodSegment;\n}\n\n// Take a URL, and return the article base of said URL. That is, no\n// pagination data exists in it. Useful for comparing to other links\n// that might have pagination data within them.\nfunction articleBaseUrl(url, parsed) {\n var parsedUrl = parsed || URL.parse(url);\n var protocol = parsedUrl.protocol,\n host = parsedUrl.host,\n path = parsedUrl.path;\n\n\n var firstSegmentHasLetters = false;\n var cleanedSegments = path.split('/').reverse().reduce(function (acc, rawSegment, index) {\n var segment = rawSegment;\n\n // Split off and save anything that looks like a file type.\n if (segment.includes('.')) {\n var _segment$split = segment.split('.'),\n _segment$split2 = _slicedToArray(_segment$split, 2),\n possibleSegment = _segment$split2[0],\n fileExt = _segment$split2[1];\n\n if (IS_ALPHA_RE.test(fileExt)) {\n segment = possibleSegment;\n }\n }\n\n // If our first or second segment has anything looking like a page\n // number, remove it.\n if (PAGE_IN_HREF_RE.test(segment) && index < 2) {\n segment = segment.replace(PAGE_IN_HREF_RE, '');\n }\n\n // If we're on the first segment, check to see if we have any\n // characters in it. The first segment is actually the last bit of\n // the URL, and this will be helpful to determine if we're on a URL\n // segment that looks like \"/2/\" for example.\n if (index === 0) {\n firstSegmentHasLetters = HAS_ALPHA_RE.test(segment);\n }\n\n // If it's not marked for deletion, push it to cleaned_segments.\n if (isGoodSegment(segment, index, firstSegmentHasLetters)) {\n acc.push(segment);\n }\n\n return acc;\n }, []);\n\n return protocol + '//' + host + cleanedSegments.reverse().join('/');\n}\n\n// Given a string, return True if it appears to have an ending sentence\n// within it, false otherwise.\nvar SENTENCE_END_RE = new RegExp('.( |$)');\nfunction hasSentenceEnd(text) {\n return SENTENCE_END_RE.test(text);\n}\n\nfunction excerptContent(content) {\n var words = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 10;\n\n return content.trim().split(/\\s+/).slice(0, words).join(' ');\n}\n\n// Now that we have a top_candidate, look through the siblings of\n// it to see if any of them are decently scored. If they are, they\n// may be split parts of the content (Like two divs, a preamble and\n// a body.) Example:\n// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14\nfunction mergeSiblings($candidate, topScore, $) {\n if (!$candidate.parent().length) {\n return $candidate;\n }\n\n var siblingScoreThreshold = Math.max(10, topScore * 0.25);\n var wrappingDiv = $('');\n\n $candidate.parent().children().each(function (index, sibling) {\n var $sibling = $(sibling);\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE$1.test(sibling.tagName)) {\n return null;\n }\n\n var siblingScore = getScore($sibling);\n if (siblingScore) {\n if ($sibling === $candidate) {\n wrappingDiv.append($sibling);\n } else {\n var contentBonus = 0;\n var density = linkDensity($sibling);\n\n // If sibling has a very low link density,\n // give it a small bonus\n if (density < 0.05) {\n contentBonus += 20;\n }\n\n // If sibling has a high link density,\n // give it a penalty\n if (density >= 0.5) {\n contentBonus -= 20;\n }\n\n // If sibling node has the same class as\n // candidate, give it a bonus\n if ($sibling.attr('class') === $candidate.attr('class')) {\n contentBonus += topScore * 0.2;\n }\n\n var newScore = siblingScore + contentBonus;\n\n if (newScore >= siblingScoreThreshold) {\n return wrappingDiv.append($sibling);\n } else if (sibling.tagName === 'p') {\n var siblingContent = $sibling.text();\n var siblingContentLength = textLength(siblingContent);\n\n if (siblingContentLength > 80 && density < 0.25) {\n return wrappingDiv.append($sibling);\n } else if (siblingContentLength <= 80 && density === 0 && hasSentenceEnd(siblingContent)) {\n return wrappingDiv.append($sibling);\n }\n }\n }\n }\n\n return null;\n });\n\n return wrappingDiv;\n}\n\n// After we've calculated scores, loop through all of the possible\n// candidate nodes we found and find the one with the highest score.\nfunction findTopCandidate$$1($) {\n var $candidate = void 0;\n var topScore = 0;\n\n $('[score]').each(function (index, node) {\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE$1.test(node.tagName)) {\n return;\n }\n\n var $node = $(node);\n var score = getScore($node);\n\n if (score > topScore) {\n topScore = score;\n $candidate = $node;\n }\n });\n\n // If we don't have a candidate, return the body\n // or whatever the first element is\n if (!$candidate) {\n return $('body') || $('*').first();\n }\n\n $candidate = mergeSiblings($candidate, topScore, $);\n\n return $candidate;\n}\n\n// Scoring\n\nfunction removeUnlessContent($node, $, weight) {\n // Explicitly save entry-content-asset tags, which are\n // noted as valuable in the Publisher guidelines. For now\n // this works everywhere. We may want to consider making\n // this less of a sure-thing later.\n if ($node.hasClass('entry-content-asset')) {\n return;\n }\n\n var content = normalizeSpaces($node.text());\n\n if (scoreCommas(content) < 10) {\n var pCount = $('p', $node).length;\n var inputCount = $('input', $node).length;\n\n // Looks like a form, too many inputs.\n if (inputCount > pCount / 3) {\n $node.remove();\n return;\n }\n\n var contentLength = content.length;\n var imgCount = $('img', $node).length;\n\n // Content is too short, and there are no images, so\n // this is probably junk content.\n if (contentLength < 25 && imgCount === 0) {\n $node.remove();\n return;\n }\n\n var density = linkDensity($node);\n\n // Too high of link density, is probably a menu or\n // something similar.\n // console.log(weight, density, contentLength)\n if (weight < 25 && density > 0.2 && contentLength > 75) {\n $node.remove();\n return;\n }\n\n // Too high of a link density, despite the score being\n // high.\n if (weight >= 25 && density > 0.5) {\n // Don't remove the node if it's a list and the\n // previous sibling starts with a colon though. That\n // means it's probably content.\n var tagName = $node.get(0).tagName;\n var nodeIsList = tagName === 'ol' || tagName === 'ul';\n if (nodeIsList) {\n var previousNode = $node.prev();\n if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {\n return;\n }\n }\n\n $node.remove();\n return;\n }\n\n var scriptCount = $('script', $node).length;\n\n // Too many script tags, not enough content.\n if (scriptCount > 0 && contentLength < 150) {\n $node.remove();\n return;\n }\n }\n}\n\n// Given an article, clean it of some superfluous content specified by\n// tags. Things like forms, ads, etc.\n//\n// Tags is an array of tag name's to search through. (like div, form,\n// etc)\n//\n// Return this same doc.\nfunction cleanTags$$1($article, $) {\n $(CLEAN_CONDITIONALLY_TAGS, $article).each(function (index, node) {\n var $node = $(node);\n var weight = getScore($node);\n if (!weight) {\n weight = getOrInitScore$$1($node, $);\n setScore($node, $, weight);\n }\n\n // drop node if its weight is < 0\n if (weight < 0) {\n $node.remove();\n } else {\n // deteremine if node seems like content\n removeUnlessContent($node, $, weight);\n }\n });\n\n return $;\n}\n\nfunction cleanHeaders($article, $) {\n var title = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : '';\n\n $(HEADER_TAG_LIST, $article).each(function (index, header) {\n var $header = $(header);\n // Remove any headers that appear before all other p tags in the\n // document. This probably means that it was part of the title, a\n // subtitle or something else extraneous like a datestamp or byline,\n // all of which should be handled by other metadata handling.\n if ($($header, $article).prevAll('p').length === 0) {\n return $header.remove();\n }\n\n // Remove any headers that match the title exactly.\n if (normalizeSpaces($(header).text()) === title) {\n return $header.remove();\n }\n\n // If this header has a negative weight, it's probably junk.\n // Get rid of it.\n if (getWeight($(header)) < 0) {\n return $header.remove();\n }\n\n return $header;\n });\n\n return $;\n}\n\n// Rewrite the tag name to div if it's a top level node like body or\n// html to avoid later complications with multiple body tags.\n\nfunction rewriteTopLevel$$1(article, $) {\n // I'm not using context here because\n // it's problematic when converting the\n // top-level/root node - AP\n $ = convertNodeTo($('html'), $, 'div');\n $ = convertNodeTo($('body'), $, 'div');\n\n return $;\n}\n\nfunction absolutize($, rootUrl, attr, $content) {\n $('[' + attr + ']', $content).each(function (_, node) {\n var url = node.attribs[attr];\n var absoluteUrl = URL.resolve(rootUrl, url);\n\n node.attribs[attr] = absoluteUrl;\n });\n}\n\nfunction makeLinksAbsolute($content, $, url) {\n ['href', 'src'].forEach(function (attr) {\n return absolutize($, url, attr, $content);\n });\n\n return $content;\n}\n\nfunction textLength(text) {\n return text.trim().replace(/\\s+/g, ' ').length;\n}\n\n// Determines what percentage of the text\n// in a node is link text\n// Takes a node, returns a float\nfunction linkDensity($node) {\n var totalTextLength = textLength($node.text());\n\n var linkText = $node.find('a').text();\n var linkLength = textLength(linkText);\n\n if (totalTextLength > 0) {\n return linkLength / totalTextLength;\n } else if (totalTextLength === 0 && linkLength > 0) {\n return 1;\n }\n\n return 0;\n}\n\n// Given a node type to search for, and a list of meta tag names to\n// search for, find a meta tag associated.\n\nfunction extractFromMeta$$1($, metaNames, cachedNames) {\n var cleanTags$$1 = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : true;\n\n var foundNames = metaNames.filter(function (name) {\n return cachedNames.indexOf(name) !== -1;\n });\n\n var _iteratorNormalCompletion = true;\n var _didIteratorError = false;\n var _iteratorError = undefined;\n\n try {\n var _loop = function _loop() {\n var name = _step.value;\n\n var type = 'name';\n var value = 'value';\n\n var nodes = $('meta[' + type + '=\"' + name + '\"]');\n\n // Get the unique value of every matching node, in case there\n // are two meta tags with the same name and value.\n // Remove empty values.\n var values = nodes.map(function (index, node) {\n return $(node).attr(value);\n }).toArray().filter(function (text) {\n return text !== '';\n });\n\n // If we have more than one value for the same name, we have a\n // conflict and can't trust any of them. Skip this name. If we have\n // zero, that means our meta tags had no values. Skip this name\n // also.\n if (values.length === 1) {\n var metaValue = void 0;\n // Meta values that contain HTML should be stripped, as they\n // weren't subject to cleaning previously.\n if (cleanTags$$1) {\n metaValue = stripTags(values[0], $);\n } else {\n metaValue = values[0];\n }\n\n return {\n v: metaValue\n };\n }\n };\n\n for (var _iterator = _getIterator(foundNames), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {\n var _ret = _loop();\n\n if ((typeof _ret === 'undefined' ? 'undefined' : _typeof(_ret)) === \"object\") return _ret.v;\n }\n\n // If nothing is found, return null\n } catch (err) {\n _didIteratorError = true;\n _iteratorError = err;\n } finally {\n try {\n if (!_iteratorNormalCompletion && _iterator.return) {\n _iterator.return();\n }\n } finally {\n if (_didIteratorError) {\n throw _iteratorError;\n }\n }\n }\n\n return null;\n}\n\nfunction isGoodNode($node, maxChildren) {\n // If it has a number of children, it's more likely a container\n // element. Skip it.\n if ($node.children().length > maxChildren) {\n return false;\n }\n // If it looks to be within a comment, skip it.\n if (withinComment($node)) {\n return false;\n }\n\n return true;\n}\n\n// Given a a list of selectors find content that may\n// be extractable from the document. This is for flat\n// meta-information, like author, title, date published, etc.\nfunction extractFromSelectors$$1($, selectors) {\n var maxChildren = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 1;\n var textOnly = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : true;\n var _iteratorNormalCompletion = true;\n var _didIteratorError = false;\n var _iteratorError = undefined;\n\n try {\n for (var _iterator = _getIterator(selectors), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {\n var selector = _step.value;\n\n var nodes = $(selector);\n\n // If we didn't get exactly one of this selector, this may be\n // a list of articles or comments. Skip it.\n if (nodes.length === 1) {\n var $node = $(nodes[0]);\n\n if (isGoodNode($node, maxChildren)) {\n var content = void 0;\n if (textOnly) {\n content = $node.text();\n } else {\n content = $node.html();\n }\n\n if (content) {\n return content;\n }\n }\n }\n }\n } catch (err) {\n _didIteratorError = true;\n _iteratorError = err;\n } finally {\n try {\n if (!_iteratorNormalCompletion && _iterator.return) {\n _iterator.return();\n }\n } finally {\n if (_didIteratorError) {\n throw _iteratorError;\n }\n }\n }\n\n return null;\n}\n\n// strips all tags from a string of text\nfunction stripTags(text, $) {\n // Wrapping text in html element prevents errors when text\n // has no html\n var cleanText = $('' + text + '').text();\n return cleanText === '' ? text : cleanText;\n}\n\nfunction withinComment($node) {\n var parents = $node.parents().toArray();\n var commentParent = parents.find(function (parent) {\n var classAndId = parent.attribs.class + ' ' + parent.attribs.id;\n return classAndId.includes('comment');\n });\n\n return commentParent !== undefined;\n}\n\n// Given a node, determine if it's article-like enough to return\n// param: node (a cheerio node)\n// return: boolean\n\nfunction nodeIsSufficient($node) {\n return $node.text().trim().length >= 100;\n}\n\nfunction isWordpress($) {\n return $(IS_WP_SELECTOR).length > 0;\n}\n\n// DOM manipulation\n\n// CLEAN AUTHOR CONSTANTS\nvar CLEAN_AUTHOR_RE = /^\\s*(posted |written )?by\\s*:?\\s*(.*)/i;\n// author = re.sub(r'^\\s*(posted |written )?by\\s*:?\\s*(.*)(?i)',\n\n// CLEAN DEK CONSTANTS\nvar TEXT_LINK_RE = new RegExp('http(s)?://', 'i');\n// An ordered list of meta tag names that denote likely article deks.\n// From most distinct to least distinct.\n//\n// NOTE: There are currently no meta tags that seem to provide the right\n// content consistenty enough. Two options were:\n// - og:description\n// - dc.description\n// However, these tags often have SEO-specific junk in them that's not\n// header-worthy like a dek is. Excerpt material at best.\n\n\n// An ordered list of Selectors to find likely article deks. From\n// most explicit to least explicit.\n//\n// Should be more restrictive than not, as a failed dek can be pretty\n// detrimental to the aesthetics of an article.\n\n\n// CLEAN DATE PUBLISHED CONSTANTS\nvar MS_DATE_STRING = /^\\d{13}$/i;\nvar SEC_DATE_STRING = /^\\d{10}$/i;\nvar CLEAN_DATE_STRING_RE = /^\\s*published\\s*:?\\s*(.*)/i;\nvar TIME_MERIDIAN_SPACE_RE = /(.*\\d)(am|pm)(.*)/i;\nvar TIME_MERIDIAN_DOTS_RE = /\\.m\\./i;\nvar months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'];\nvar allMonths = months.join('|');\nvar timestamp1 = '[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';\nvar timestamp2 = '[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';\nvar SPLIT_DATE_STRING = new RegExp('(' + timestamp1 + ')|(' + timestamp2 + ')|([0-9]{1,4})|(' + allMonths + ')', 'ig');\n\n// CLEAN TITLE CONSTANTS\n// A regular expression that will match separating characters on a\n// title, that usually denote breadcrumbs or something similar.\nvar TITLE_SPLITTERS_RE = /(: | - | \\| )/g;\n\nvar DOMAIN_ENDINGS_RE = new RegExp('.com$|.net$|.org$|.co.uk$', 'g');\n\n// Take an author string (like 'By David Smith ') and clean it to\n// just the name(s): 'David Smith'.\nfunction cleanAuthor(author) {\n return author.replace(CLEAN_AUTHOR_RE, '$2').trim();\n}\n\nfunction clean$1(leadImageUrl) {\n leadImageUrl = leadImageUrl.trim();\n if (validUrl.isWebUri(leadImageUrl)) {\n return leadImageUrl;\n }\n\n return null;\n}\n\n// Take a dek HTML fragment, and return the cleaned version of it.\n// Return None if the dek wasn't good enough.\nfunction cleanDek(dek, _ref) {\n var $ = _ref.$,\n excerpt = _ref.excerpt;\n\n // Sanity check that we didn't get too short or long of a dek.\n if (dek.length > 1000 || dek.length < 5) return null;\n\n // Check that dek isn't the same as excerpt\n if (excerpt && excerptContent(excerpt, 10) === excerptContent(dek, 10)) return null;\n\n var dekText = stripTags(dek, $);\n\n // Plain text links shouldn't exist in the dek. If we have some, it's\n // not a good dek - bail.\n if (TEXT_LINK_RE.test(dekText)) return null;\n\n return dekText.trim();\n}\n\n// Is there a compelling reason to use moment here?\n// Mostly only being used for the isValid() method,\n// but could just check for 'Invalid Date' string.\n\nfunction cleanDateString(dateString) {\n return (dateString.match(SPLIT_DATE_STRING) || []).join(' ').replace(TIME_MERIDIAN_DOTS_RE, 'm').replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3').replace(CLEAN_DATE_STRING_RE, '$1').trim();\n}\n\n// Take a date published string, and hopefully return a date out of\n// it. Return none if we fail.\nfunction cleanDatePublished(dateString) {\n // If string is in milliseconds or seconds, convert to int\n if (MS_DATE_STRING.test(dateString) || SEC_DATE_STRING.test(dateString)) {\n dateString = parseInt(dateString, 10);\n }\n\n var date = moment(new Date(dateString));\n\n if (!date.isValid()) {\n dateString = cleanDateString(dateString);\n date = moment(new Date(dateString));\n }\n\n return date.isValid() ? date.toISOString() : null;\n}\n\n// Clean our article content, returning a new, cleaned node.\n\nfunction extractCleanNode(article, _ref) {\n var $ = _ref.$,\n _ref$cleanConditional = _ref.cleanConditionally,\n cleanConditionally = _ref$cleanConditional === undefined ? true : _ref$cleanConditional,\n _ref$title = _ref.title,\n title = _ref$title === undefined ? '' : _ref$title,\n _ref$url = _ref.url,\n url = _ref$url === undefined ? '' : _ref$url,\n _ref$defaultCleaner = _ref.defaultCleaner,\n defaultCleaner = _ref$defaultCleaner === undefined ? true : _ref$defaultCleaner;\n\n // Rewrite the tag name to div if it's a top level node like body or\n // html to avoid later complications with multiple body tags.\n rewriteTopLevel$$1(article, $);\n\n // Drop small images and spacer images\n // Only do this is defaultCleaner is set to true;\n // this can sometimes be too aggressive.\n if (defaultCleaner) cleanImages(article, $);\n\n // Mark elements to keep that would normally be removed.\n // E.g., stripJunkTags will remove iframes, so we're going to mark\n // YouTube/Vimeo videos as elements we want to keep.\n markToKeep(article, $, url);\n\n // Drop certain tags like , etc\n // This is -mostly- for cleanliness, not security.\n stripJunkTags(article, $);\n\n // H1 tags are typically the article title, which should be extracted\n // by the title extractor instead. If there's less than 3 of them (<3),\n // strip them. Otherwise, turn 'em into H2s.\n cleanHOnes$$1(article, $);\n\n // Clean headers\n cleanHeaders(article, $, title);\n\n // Make links absolute\n makeLinksAbsolute(article, $, url);\n\n // We used to clean UL's and OL's here, but it was leading to\n // too many in-article lists being removed. Consider a better\n // way to detect menus particularly and remove them.\n // Also optionally running, since it can be overly aggressive.\n if (defaultCleaner) cleanTags$$1(article, $, cleanConditionally);\n\n // Remove empty paragraph nodes\n removeEmpty(article, $);\n\n // Remove unnecessary attributes\n cleanAttributes(article, $);\n\n return article;\n}\n\nfunction cleanTitle$$1(title, _ref) {\n var url = _ref.url,\n $ = _ref.$;\n\n // If title has |, :, or - in it, see if\n // we can clean it up.\n if (TITLE_SPLITTERS_RE.test(title)) {\n title = resolveSplitTitle(title, url);\n }\n\n // Final sanity check that we didn't get a crazy title.\n // if (title.length > 150 || title.length < 15) {\n if (title.length > 150) {\n // If we did, return h1 from the document if it exists\n var h1 = $('h1');\n if (h1.length === 1) {\n title = h1.text();\n }\n }\n\n // strip any html tags in the title text\n return stripTags(title, $).trim();\n}\n\nfunction extractBreadcrumbTitle(splitTitle, text) {\n // This must be a very breadcrumbed title, like:\n // The Best Gadgets on Earth : Bits : Blogs : NYTimes.com\n // NYTimes - Blogs - Bits - The Best Gadgets on Earth\n if (splitTitle.length >= 6) {\n var _ret = function () {\n // Look to see if we can find a breadcrumb splitter that happens\n // more than once. If we can, we'll be able to better pull out\n // the title.\n var termCounts = splitTitle.reduce(function (acc, titleText) {\n acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;\n return acc;\n }, {});\n\n var _Reflect$ownKeys$redu = _Reflect$ownKeys(termCounts).reduce(function (acc, key) {\n if (acc[1] < termCounts[key]) {\n return [key, termCounts[key]];\n }\n\n return acc;\n }, [0, 0]),\n _Reflect$ownKeys$redu2 = _slicedToArray(_Reflect$ownKeys$redu, 2),\n maxTerm = _Reflect$ownKeys$redu2[0],\n termCount = _Reflect$ownKeys$redu2[1];\n\n // We found a splitter that was used more than once, so it\n // is probably the breadcrumber. Split our title on that instead.\n // Note: max_term should be <= 4 characters, so that \" >> \"\n // will match, but nothing longer than that.\n\n\n if (termCount >= 2 && maxTerm.length <= 4) {\n splitTitle = text.split(maxTerm);\n }\n\n var splitEnds = [splitTitle[0], splitTitle.slice(-1)];\n var longestEnd = splitEnds.reduce(function (acc, end) {\n return acc.length > end.length ? acc : end;\n }, '');\n\n if (longestEnd.length > 10) {\n return {\n v: longestEnd\n };\n }\n\n return {\n v: text\n };\n }();\n\n if ((typeof _ret === 'undefined' ? 'undefined' : _typeof(_ret)) === \"object\") return _ret.v;\n }\n\n return null;\n}\n\nfunction cleanDomainFromTitle(splitTitle, url) {\n // Search the ends of the title, looking for bits that fuzzy match\n // the URL too closely. If one is found, discard it and return the\n // rest.\n //\n // Strip out the big TLDs - it just makes the matching a bit more\n // accurate. Not the end of the world if it doesn't strip right.\n var _URL$parse = URL.parse(url),\n host = _URL$parse.host;\n\n var nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');\n\n var startSlug = splitTitle[0].toLowerCase().replace(' ', '');\n var startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);\n\n if (startSlugRatio > 0.4 && startSlug.length > 5) {\n return splitTitle.slice(2).join('');\n }\n\n var endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '');\n var endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);\n\n if (endSlugRatio > 0.4 && endSlug.length >= 5) {\n return splitTitle.slice(0, -2).join('');\n }\n\n return null;\n}\n\n// Given a title with separators in it (colons, dashes, etc),\n// resolve whether any of the segments should be removed.\nfunction resolveSplitTitle(title) {\n var url = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : '';\n\n // Splits while preserving splitters, like:\n // ['The New New York', ' - ', 'The Washington Post']\n var splitTitle = title.split(TITLE_SPLITTERS_RE);\n if (splitTitle.length === 1) {\n return title;\n }\n\n var newTitle = extractBreadcrumbTitle(splitTitle, title);\n if (newTitle) return newTitle;\n\n newTitle = cleanDomainFromTitle(splitTitle, url);\n if (newTitle) return newTitle;\n\n // Fuzzy ratio didn't find anything, so this title is probably legit.\n // Just return it all.\n return title;\n}\n\nvar Cleaners = {\n author: cleanAuthor,\n lead_image_url: clean$1,\n dek: cleanDek,\n date_published: cleanDatePublished,\n content: extractCleanNode,\n title: cleanTitle$$1\n};\n\n// Using a variety of scoring techniques, extract the content most\n// likely to be article text.\n//\n// If strip_unlikely_candidates is True, remove any elements that\n// match certain criteria first. (Like, does this element have a\n// classname of \"comment\")\n//\n// If weight_nodes is True, use classNames and IDs to determine the\n// worthiness of nodes.\n//\n// Returns a cheerio object $\nfunction extractBestNode($, opts) {\n // clone the node so we can get back to our\n // initial parsed state if needed\n // TODO Do I need this? – AP\n // let $root = $.root().clone()\n\n if (opts.stripUnlikelyCandidates) {\n $ = stripUnlikelyCandidates($);\n }\n\n $ = convertToParagraphs$$1($);\n $ = scoreContent$$1($, opts.weightNodes);\n var $topCandidate = findTopCandidate$$1($);\n\n return $topCandidate;\n}\n\nvar GenericContentExtractor = {\n defaultOpts: {\n stripUnlikelyCandidates: true,\n weightNodes: true,\n cleanConditionally: true\n },\n\n // Extract the content for this resource - initially, pass in our\n // most restrictive opts which will return the highest quality\n // content. On each failure, retry with slightly more lax opts.\n //\n // :param return_type: string. If \"node\", should return the content\n // as a cheerio node rather than as an HTML string.\n //\n // Opts:\n // stripUnlikelyCandidates: Remove any elements that match\n // non-article-like criteria first.(Like, does this element\n // have a classname of \"comment\")\n //\n // weightNodes: Modify an elements score based on whether it has\n // certain classNames or IDs. Examples: Subtract if a node has\n // a className of 'comment', Add if a node has an ID of\n // 'entry-content'.\n //\n // cleanConditionally: Clean the node to return of some\n // superfluous content. Things like forms, ads, etc.\n extract: function extract(_ref, opts) {\n var $ = _ref.$,\n html = _ref.html,\n title = _ref.title,\n url = _ref.url;\n\n opts = _extends({}, this.defaultOpts, opts);\n\n $ = $ || cheerio.load(html);\n\n // Cascade through our extraction-specific opts in an ordered fashion,\n // turning them off as we try to extract content.\n var node = this.getContentNode($, title, url, opts);\n\n if (nodeIsSufficient(node)) {\n return this.cleanAndReturnNode(node, $);\n }\n\n // We didn't succeed on first pass, one by one disable our\n // extraction opts and try again.\n var _iteratorNormalCompletion = true;\n var _didIteratorError = false;\n var _iteratorError = undefined;\n\n try {\n for (var _iterator = _getIterator(_Reflect$ownKeys(opts).filter(function (k) {\n return opts[k] === true;\n })), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {\n var key = _step.value;\n\n opts[key] = false;\n $ = cheerio.load(html);\n\n node = this.getContentNode($, title, url, opts);\n\n if (nodeIsSufficient(node)) {\n break;\n }\n }\n } catch (err) {\n _didIteratorError = true;\n _iteratorError = err;\n } finally {\n try {\n if (!_iteratorNormalCompletion && _iterator.return) {\n _iterator.return();\n }\n } finally {\n if (_didIteratorError) {\n throw _iteratorError;\n }\n }\n }\n\n return this.cleanAndReturnNode(node, $);\n },\n\n\n // Get node given current options\n getContentNode: function getContentNode($, title, url, opts) {\n return extractCleanNode(extractBestNode($, opts), {\n $: $,\n cleanConditionally: opts.cleanConditionally,\n title: title,\n url: url\n });\n },\n\n\n // Once we got here, either we're at our last-resort node, or\n // we broke early. Make sure we at least have -something- before we\n // move forward.\n cleanAndReturnNode: function cleanAndReturnNode(node, $) {\n if (!node) {\n return null;\n }\n\n return normalizeSpaces($.html(node));\n\n // if return_type == \"html\":\n // return normalize_spaces(node_to_html(node))\n // else:\n // return node\n }\n};\n\n// TODO: It would be great if we could merge the meta and selector lists into\n// a list of objects, because we could then rank them better. For example,\n// .hentry .entry-title is far better suited than .\n\n// An ordered list of meta tag names that denote likely article titles. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\nvar STRONG_TITLE_META_TAGS = ['tweetmeme-title', 'dc.title', 'rbtitle', 'headline', 'title'];\n\n// og:title is weak because it typically contains context that we don't like,\n// for example the source site's name. Gotta get that brand into facebook!\nvar WEAK_TITLE_META_TAGS = ['og:title'];\n\n// An ordered list of XPath Selectors to find likely article titles. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nvar STRONG_TITLE_SELECTORS = ['.hentry .entry-title', 'h1#articleHeader', 'h1.articleHeader', 'h1.article', '.instapaper_title', '#meebo-title'];\n\nvar WEAK_TITLE_SELECTORS = ['article h1', '#entry-title', '.entry-title', '#entryTitle', '#entrytitle', '.entryTitle', '.entrytitle', '#articleTitle', '.articleTitle', 'post post-title', 'h1.title', 'h2.article', 'h1', 'html head title', 'title'];\n\nvar GenericTitleExtractor = {\n extract: function extract(_ref) {\n var $ = _ref.$,\n url = _ref.url,\n metaCache = _ref.metaCache;\n\n // First, check to see if we have a matching meta tag that we can make\n // use of that is strongly associated with the headline.\n var title = void 0;\n\n title = extractFromMeta$$1($, STRONG_TITLE_META_TAGS, metaCache);\n if (title) return cleanTitle$$1(title, { url: url, $: $ });\n\n // Second, look through our content selectors for the most likely\n // article title that is strongly associated with the headline.\n title = extractFromSelectors$$1($, STRONG_TITLE_SELECTORS);\n if (title) return cleanTitle$$1(title, { url: url, $: $ });\n\n // Third, check for weaker meta tags that may match.\n title = extractFromMeta$$1($, WEAK_TITLE_META_TAGS, metaCache);\n if (title) return cleanTitle$$1(title, { url: url, $: $ });\n\n // Last, look for weaker selector tags that may match.\n title = extractFromSelectors$$1($, WEAK_TITLE_SELECTORS);\n if (title) return cleanTitle$$1(title, { url: url, $: $ });\n\n // If no matches, return an empty string\n return '';\n }\n};\n\n// An ordered list of meta tag names that denote likely article authors. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\n//\n// Note: \"author\" is too often the -developer- of the page, so it is not\n// added here.\nvar AUTHOR_META_TAGS = ['byl', 'clmst', 'dc.author', 'dcsext.author', 'dc.creator', 'rbauthors', 'authors'];\n\nvar AUTHOR_MAX_LENGTH = 300;\n\n// An ordered list of XPath Selectors to find likely article authors. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nvar AUTHOR_SELECTORS = ['.entry .entry-author', '.author.vcard .fn', '.author .vcard .fn', '.byline.vcard .fn', '.byline .vcard .fn', '.byline .by .author', '.byline .by', '.byline .author', '.post-author.vcard', '.post-author .vcard', 'a[rel=author]', '#by_author', '.by_author', '#entryAuthor', '.entryAuthor', '.byline a[href*=author]', '#author .authorname', '.author .authorname', '#author', '.author', '.articleauthor', '.ArticleAuthor', '.byline'];\n\n// An ordered list of Selectors to find likely article authors, with\n// regular expression for content.\nvar bylineRe = /^[\\n\\s]*By/i;\nvar BYLINE_SELECTORS_RE = [['#byline', bylineRe], ['.byline', bylineRe]];\n\nvar GenericAuthorExtractor = {\n extract: function extract(_ref) {\n var $ = _ref.$,\n metaCache = _ref.metaCache;\n\n var author = void 0;\n\n // First, check to see if we have a matching\n // meta tag that we can make use of.\n author = extractFromMeta$$1($, AUTHOR_META_TAGS, metaCache);\n if (author && author.length < AUTHOR_MAX_LENGTH) {\n return cleanAuthor(author);\n }\n\n // Second, look through our selectors looking for potential authors.\n author = extractFromSelectors$$1($, AUTHOR_SELECTORS, 2);\n if (author && author.length < AUTHOR_MAX_LENGTH) {\n return cleanAuthor(author);\n }\n\n // Last, use our looser regular-expression based selectors for\n // potential authors.\n var _iteratorNormalCompletion = true;\n var _didIteratorError = false;\n var _iteratorError = undefined;\n\n try {\n for (var _iterator = _getIterator(BYLINE_SELECTORS_RE), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {\n var _ref4 = _step.value;\n\n var _ref3 = _slicedToArray(_ref4, 2);\n\n var selector = _ref3[0];\n var regex = _ref3[1];\n\n var node = $(selector);\n if (node.length === 1) {\n var text = node.text();\n if (regex.test(text)) {\n return cleanAuthor(text);\n }\n }\n }\n } catch (err) {\n _didIteratorError = true;\n _iteratorError = err;\n } finally {\n try {\n if (!_iteratorNormalCompletion && _iterator.return) {\n _iterator.return();\n }\n } finally {\n if (_didIteratorError) {\n throw _iteratorError;\n }\n }\n }\n\n return null;\n }\n};\n\n// An ordered list of meta tag names that denote\n// likely date published dates. All attributes\n// should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nvar DATE_PUBLISHED_META_TAGS = ['article:published_time', 'displaydate', 'dc.date', 'dc.date.issued', 'rbpubdate', 'publish_date', 'pub_date', 'pagedate', 'pubdate', 'revision_date', 'doc_date', 'date_created', 'content_create_date', 'lastmodified', 'created', 'date'];\n\n// An ordered list of XPath Selectors to find\n// likely date published dates. From most explicit\n// to least explicit.\nvar DATE_PUBLISHED_SELECTORS = ['.hentry .dtstamp.published', '.hentry .published', '.hentry .dtstamp.updated', '.hentry .updated', '.single .published', '.meta .published', '.meta .postDate', '.entry-date', '.byline .date', '.postmetadata .date', '.article_datetime', '.date-header', '.story-date', '.dateStamp', '#story .datetime', '.dateline', '.pubdate'];\n\n// An ordered list of compiled regular expressions to find likely date\n// published dates from the URL. These should always have the first\n// reference be a date string that is parseable by dateutil.parser.parse\nvar abbrevMonthsStr = '(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)';\nvar DATE_PUBLISHED_URL_RES = [\n// /2012/01/27/ but not /2012/01/293\nnew RegExp('/(20\\\\d{2}/\\\\d{2}/\\\\d{2})/', 'i'),\n// 20120127 or 20120127T but not 2012012733 or 8201201733\n// /[^0-9](20\\d{2}[01]\\d[0-3]\\d)([^0-9]|$)/i,\n// 2012-01-27\nnew RegExp('(20\\\\d{2}-[01]\\\\d-[0-3]\\\\d)', 'i'),\n// /2012/jan/27/\nnew RegExp('/(20\\\\d{2}/' + abbrevMonthsStr + '/[0-3]\\\\d)/', 'i')];\n\nvar GenericDatePublishedExtractor = {\n extract: function extract(_ref) {\n var $ = _ref.$,\n url = _ref.url,\n metaCache = _ref.metaCache;\n\n var datePublished = void 0;\n // First, check to see if we have a matching meta tag\n // that we can make use of.\n // Don't try cleaning tags from this string\n datePublished = extractFromMeta$$1($, DATE_PUBLISHED_META_TAGS, metaCache, false);\n if (datePublished) return cleanDatePublished(datePublished);\n\n // Second, look through our selectors looking for potential\n // date_published's.\n datePublished = extractFromSelectors$$1($, DATE_PUBLISHED_SELECTORS);\n if (datePublished) return cleanDatePublished(datePublished);\n\n // Lastly, look to see if a dately string exists in the URL\n datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);\n if (datePublished) return cleanDatePublished(datePublished);\n\n return null;\n }\n};\n\n// import {\n// DEK_META_TAGS,\n// DEK_SELECTORS,\n// DEK_URL_RES,\n// } from './constants';\n\n// import { cleanDek } from 'cleaners';\n\n// import {\n// extractFromMeta,\n// extractFromSelectors,\n// } from 'utils/dom';\n\n// Currently there is only one selector for\n// deks. We should simply return null here\n// until we have a more robust generic option.\n// Below is the original source for this, for reference.\nvar GenericDekExtractor = {\n // extract({ $, content, metaCache }) {\n extract: function extract() {\n return null;\n }\n};\n\n\n\n// def extract_dek(self):\n// # First, check to see if we have a matching meta tag that we can make\n// # use of.\n// dek = self.extract_from_meta('dek', constants.DEK_META_TAGS)\n// if not dek:\n// # Second, look through our CSS/XPath selectors. This may return\n// # an HTML fragment.\n// dek = self.extract_from_selectors('dek',\n// constants.DEK_SELECTORS,\n// text_only=False)\n//\n// if dek:\n// # Make sure our dek isn't in the first few thousand characters\n// # of the content, otherwise it's just the start of the article\n// # and not a true dek.\n// content = self.extract_content()\n// content_chunk = normalize_spaces(strip_tags(content[:2000]))\n// dek_chunk = normalize_spaces(dek[:100]) # Already has no tags.\n//\n// # 80% or greater similarity means the dek was very similar to some\n// # of the starting content, so we skip it.\n// if fuzz.partial_ratio(content_chunk, dek_chunk) < 80:\n// return dek\n//\n// return None\n\n// An ordered list of meta tag names that denote likely article leading images.\n// All attributes should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nvar LEAD_IMAGE_URL_META_TAGS = ['og:image', 'twitter:image', 'image_src'];\n\nvar LEAD_IMAGE_URL_SELECTORS = ['link[rel=image_src]'];\n\nvar POSITIVE_LEAD_IMAGE_URL_HINTS = ['upload', 'wp-content', 'large', 'photo', 'wp-image'];\nvar POSITIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(POSITIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');\n\nvar NEGATIVE_LEAD_IMAGE_URL_HINTS = ['spacer', 'sprite', 'blank', 'throbber', 'gradient', 'tile', 'bg', 'background', 'icon', 'social', 'header', 'hdr', 'advert', 'spinner', 'loader', 'loading', 'default', 'rating', 'share', 'facebook', 'twitter', 'theme', 'promo', 'ads', 'wp-includes'];\nvar NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');\n\nvar GIF_RE = /\\.gif(\\?.*)?$/i;\nvar JPG_RE = /\\.jpe?g(\\?.*)?$/i;\n\nfunction getSig($node) {\n return ($node.attr('class') || '') + ' ' + ($node.attr('id') || '');\n}\n\n// Scores image urls based on a variety of heuristics.\nfunction scoreImageUrl(url) {\n url = url.trim();\n var score = 0;\n\n if (POSITIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n score += 20;\n }\n\n if (NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n score -= 20;\n }\n\n // TODO: We might want to consider removing this as\n // gifs are much more common/popular than they once were\n if (GIF_RE.test(url)) {\n score -= 10;\n }\n\n if (JPG_RE.test(url)) {\n score += 10;\n }\n\n // PNGs are neutral.\n\n return score;\n}\n\n// Alt attribute usually means non-presentational image.\nfunction scoreAttr($img) {\n if ($img.attr('alt')) {\n return 5;\n }\n\n return 0;\n}\n\n// Look through our parent and grandparent for figure-like\n// container elements, give a bonus if we find them\nfunction scoreByParents($img) {\n var score = 0;\n var $figParent = $img.parents('figure').first();\n\n if ($figParent.length === 1) {\n score += 25;\n }\n\n var $parent = $img.parent();\n var $gParent = void 0;\n if ($parent.length === 1) {\n $gParent = $parent.parent();\n }\n\n [$parent, $gParent].forEach(function ($node) {\n if (PHOTO_HINTS_RE$1.test(getSig($node))) {\n score += 15;\n }\n });\n\n return score;\n}\n\n// Look at our immediate sibling and see if it looks like it's a\n// caption. Bonus if so.\nfunction scoreBySibling($img) {\n var score = 0;\n var $sibling = $img.next();\n var sibling = $sibling.get(0);\n\n if (sibling && sibling.tagName === 'figcaption') {\n score += 25;\n }\n\n if (PHOTO_HINTS_RE$1.test(getSig($sibling))) {\n score += 15;\n }\n\n return score;\n}\n\nfunction scoreByDimensions($img) {\n var score = 0;\n\n var width = parseFloat($img.attr('width'));\n var height = parseFloat($img.attr('height'));\n var src = $img.attr('src');\n\n // Penalty for skinny images\n if (width && width <= 50) {\n score -= 50;\n }\n\n // Penalty for short images\n if (height && height <= 50) {\n score -= 50;\n }\n\n if (width && height && !src.includes('sprite')) {\n var area = width * height;\n if (area < 5000) {\n // Smaller than 50 x 100\n score -= 100;\n } else {\n score += Math.round(area / 1000);\n }\n }\n\n return score;\n}\n\nfunction scoreByPosition($imgs, index) {\n return $imgs.length / 2 - index;\n}\n\n// Given a resource, try to find the lead image URL from within\n// it. Like content and next page extraction, uses a scoring system\n// to determine what the most likely image may be. Short circuits\n// on really probable things like og:image meta tags.\n//\n// Potential signals to still take advantage of:\n// * domain\n// * weird aspect ratio\nvar GenericLeadImageUrlExtractor = {\n extract: function extract(_ref) {\n var $ = _ref.$,\n content = _ref.content,\n metaCache = _ref.metaCache;\n\n var cleanUrl = void 0;\n\n // Check to see if we have a matching meta tag that we can make use of.\n // Moving this higher because common practice is now to use large\n // images on things like Open Graph or Twitter cards.\n // images usually have for things like Open Graph.\n var imageUrl = extractFromMeta$$1($, LEAD_IMAGE_URL_META_TAGS, metaCache, false);\n\n if (imageUrl) {\n cleanUrl = clean$1(imageUrl);\n\n if (cleanUrl) return cleanUrl;\n }\n\n // Next, try to find the \"best\" image via the content.\n // We'd rather not have to fetch each image and check dimensions,\n // so try to do some analysis and determine them instead.\n var imgs = $('img', content).toArray();\n var imgScores = {};\n\n imgs.forEach(function (img, index) {\n var $img = $(img);\n var src = $img.attr('src');\n\n if (!src) return;\n\n var score = scoreImageUrl(src);\n score += scoreAttr($img);\n score += scoreByParents($img);\n score += scoreBySibling($img);\n score += scoreByDimensions($img);\n score += scoreByPosition(imgs, index);\n\n imgScores[src] = score;\n });\n\n var _Reflect$ownKeys$redu = _Reflect$ownKeys(imgScores).reduce(function (acc, key) {\n return imgScores[key] > acc[1] ? [key, imgScores[key]] : acc;\n }, [null, 0]),\n _Reflect$ownKeys$redu2 = _slicedToArray(_Reflect$ownKeys$redu, 2),\n topUrl = _Reflect$ownKeys$redu2[0],\n topScore = _Reflect$ownKeys$redu2[1];\n\n if (topScore > 0) {\n cleanUrl = clean$1(topUrl);\n\n if (cleanUrl) return cleanUrl;\n }\n\n // If nothing else worked, check to see if there are any really\n // probable nodes in the doc, like .\n var _iteratorNormalCompletion = true;\n var _didIteratorError = false;\n var _iteratorError = undefined;\n\n try {\n for (var _iterator = _getIterator(LEAD_IMAGE_URL_SELECTORS), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {\n var selector = _step.value;\n\n var $node = $(selector).first();\n var src = $node.attr('src');\n if (src) {\n cleanUrl = clean$1(src);\n if (cleanUrl) return cleanUrl;\n }\n\n var href = $node.attr('href');\n if (href) {\n cleanUrl = clean$1(href);\n if (cleanUrl) return cleanUrl;\n }\n\n var value = $node.attr('value');\n if (value) {\n cleanUrl = clean$1(value);\n if (cleanUrl) return cleanUrl;\n }\n }\n } catch (err) {\n _didIteratorError = true;\n _iteratorError = err;\n } finally {\n try {\n if (!_iteratorNormalCompletion && _iterator.return) {\n _iterator.return();\n }\n } finally {\n if (_didIteratorError) {\n throw _iteratorError;\n }\n }\n }\n\n return null;\n }\n};\n\n\n\n// def extract(self):\n// \"\"\"\n// # First, try to find the \"best\" image via the content.\n// # We'd rather not have to fetch each image and check dimensions,\n// # so try to do some analysis and determine them instead.\n// content = self.extractor.extract_content(return_type=\"node\")\n// imgs = content.xpath('.//img')\n// img_scores = defaultdict(int)\n// logger.debug('Scoring %d images from content', len(imgs))\n// for (i, img) in enumerate(imgs):\n// img_score = 0\n//\n// if not 'src' in img.attrib:\n// logger.debug('No src attribute found')\n// continue\n//\n// try:\n// parsed_img = urlparse(img.attrib['src'])\n// img_path = parsed_img.path.lower()\n// except ValueError:\n// logger.debug('ValueError getting img path.')\n// continue\n// logger.debug('Image path is %s', img_path)\n//\n// if constants.POSITIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):\n// logger.debug('Positive URL hints match. Adding 20.')\n// img_score += 20\n//\n// if constants.NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):\n// logger.debug('Negative URL hints match. Subtracting 20.')\n// img_score -= 20\n//\n// # Gifs are more often structure than photos\n// if img_path.endswith('gif'):\n// logger.debug('gif found. Subtracting 10.')\n// img_score -= 10\n//\n// # JPGs are more often photographs\n// if img_path.endswith('jpg'):\n// logger.debug('jpg found. Adding 10.')\n// img_score += 10\n//\n// # PNGs are neutral.\n//\n// # Alt attribute usually means non-presentational image.\n// if 'alt' in img.attrib and len(img.attrib['alt']) > 5:\n// logger.debug('alt attribute found. Adding 5.')\n// img_score += 5\n//\n// # Look through our parent and grandparent for figure-like\n// # container elements, give a bonus if we find them\n// parents = [img.getparent()]\n// if parents[0] is not None and parents[0].getparent() is not None:\n// parents.append(parents[0].getparent())\n// for p in parents:\n// if p.tag == 'figure':\n// logger.debug('Parent with
tag found. Adding 25.')\n// img_score += 25\n//\n// p_sig = ' '.join([p.get('id', ''), p.get('class', '')])\n// if constants.PHOTO_HINTS_RE.search(p_sig):\n// logger.debug('Photo hints regex match. Adding 15.')\n// img_score += 15\n//\n// # Look at our immediate sibling and see if it looks like it's a\n// # caption. Bonus if so.\n// sibling = img.getnext()\n// if sibling is not None:\n// if sibling.tag == 'figcaption':\n// img_score += 25\n//\n// sib_sig = ' '.join([sibling.get('id', ''),\n// sibling.get('class', '')]).lower()\n// if 'caption' in sib_sig:\n// img_score += 15\n//\n// # Pull out width/height if they were set.\n// img_width = None\n// img_height = None\n// if 'width' in img.attrib:\n// try:\n// img_width = float(img.get('width'))\n// except ValueError:\n// pass\n// if 'height' in img.attrib:\n// try:\n// img_height = float(img.get('height'))\n// except ValueError:\n// pass\n//\n// # Penalty for skinny images\n// if img_width and img_width <= 50:\n// logger.debug('Skinny image found. Subtracting 50.')\n// img_score -= 50\n//\n// # Penalty for short images\n// if img_height and img_height <= 50:\n// # Wide, short images are more common than narrow, tall ones\n// logger.debug('Short image found. Subtracting 25.')\n// img_score -= 25\n//\n// if img_width and img_height and not 'sprite' in img_path:\n// area = img_width * img_height\n//\n// if area < 5000: # Smaller than 50x100\n// logger.debug('Image with small area found. Subtracting 100.')\n// img_score -= 100\n// else:\n// img_score += round(area/1000.0)\n//\n// # If the image is higher on the page than other images,\n// # it gets a bonus. Penalty if lower.\n// logger.debug('Adding page placement bonus of %d.', len(imgs)/2 - i)\n// img_score += len(imgs)/2 - i\n//\n// # Use the raw src here because we munged img_path for case\n// # insensitivity\n// logger.debug('Final score is %d.', img_score)\n// img_scores[img.attrib['src']] += img_score\n//\n// top_score = 0\n// top_url = None\n// for (url, score) in img_scores.items():\n// if score > top_score:\n// top_url = url\n// top_score = score\n//\n// if top_score > 0:\n// logger.debug('Using top score image from content. Score was %d', top_score)\n// return top_url\n//\n//\n// # If nothing else worked, check to see if there are any really\n// # probable nodes in the doc, like .\n// logger.debug('Trying to find lead image in probable nodes')\n// for selector in constants.LEAD_IMAGE_URL_SELECTORS:\n// nodes = self.resource.extract_by_selector(selector)\n// for node in nodes:\n// clean_value = None\n// if node.attrib.get('src'):\n// clean_value = self.clean(node.attrib['src'])\n//\n// if not clean_value and node.attrib.get('href'):\n// clean_value = self.clean(node.attrib['href'])\n//\n// if not clean_value and node.attrib.get('value'):\n// clean_value = self.clean(node.attrib['value'])\n//\n// if clean_value:\n// logger.debug('Found lead image in probable nodes.')\n// logger.debug('Node was: %s', node)\n// return clean_value\n//\n// return None\n\nfunction scoreSimilarity(score, articleUrl, href) {\n // Do this last and only if we have a real candidate, because it's\n // potentially expensive computationally. Compare the link to this\n // URL using difflib to get the % similarity of these URLs. On a\n // sliding scale, subtract points from this link based on\n // similarity.\n if (score > 0) {\n var similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio();\n // Subtract .1 from diff_percent when calculating modifier,\n // which means that if it's less than 10% different, we give a\n // bonus instead. Ex:\n // 3% different = +17.5 points\n // 10% different = 0 points\n // 20% different = -25 points\n var diffPercent = 1.0 - similarity;\n var diffModifier = -(250 * (diffPercent - 0.2));\n return score + diffModifier;\n }\n\n return 0;\n}\n\nfunction scoreLinkText(linkText, pageNum) {\n // If the link text can be parsed as a number, give it a minor\n // bonus, with a slight bias towards lower numbered pages. This is\n // so that pages that might not have 'next' in their text can still\n // get scored, and sorted properly by score.\n var score = 0;\n\n if (IS_DIGIT_RE.test(linkText.trim())) {\n var linkTextAsNum = parseInt(linkText, 10);\n // If it's the first page, we already got it on the first call.\n // Give it a negative score. Otherwise, up to page 10, give a\n // small bonus.\n if (linkTextAsNum < 2) {\n score = -30;\n } else {\n score = Math.max(0, 10 - linkTextAsNum);\n }\n\n // If it appears that the current page number is greater than\n // this links page number, it's a very bad sign. Give it a big\n // penalty.\n if (pageNum && pageNum >= linkTextAsNum) {\n score -= 50;\n }\n }\n\n return score;\n}\n\nfunction scorePageInLink(pageNum, isWp) {\n // page in the link = bonus. Intentionally ignore wordpress because\n // their ?p=123 link style gets caught by this even though it means\n // separate documents entirely.\n if (pageNum && !isWp) {\n return 50;\n }\n\n return 0;\n}\n\nvar DIGIT_RE$2 = /\\d/;\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nvar EXTRANEOUS_LINK_HINTS$1 = ['print', 'archive', 'comment', 'discuss', 'e-mail', 'email', 'share', 'reply', 'all', 'login', 'sign', 'single', 'adx', 'entry-unrelated'];\nvar EXTRANEOUS_LINK_HINTS_RE$1 = new RegExp(EXTRANEOUS_LINK_HINTS$1.join('|'), 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\nvar NEXT_LINK_TEXT_RE$1 = new RegExp('(next|weiter|continue|>([^|]|$)|»([^|]|$))', 'i');\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nvar CAP_LINK_TEXT_RE$1 = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nvar PREV_LINK_TEXT_RE$1 = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\n\nfunction scoreExtraneousLinks(href) {\n // If the URL itself contains extraneous values, give a penalty.\n if (EXTRANEOUS_LINK_HINTS_RE$1.test(href)) {\n return -25;\n }\n\n return 0;\n}\n\nfunction makeSig$1($link) {\n return ($link.attr('class') || '') + ' ' + ($link.attr('id') || '');\n}\n\nfunction scoreByParents$1($link) {\n // If a parent node contains paging-like classname or id, give a\n // bonus. Additionally, if a parent_node contains bad content\n // (like 'sponsor'), give a penalty.\n var $parent = $link.parent();\n var positiveMatch = false;\n var negativeMatch = false;\n var score = 0;\n\n _Array$from(range(0, 4)).forEach(function () {\n if ($parent.length === 0) {\n return;\n }\n\n var parentData = makeSig$1($parent, ' ');\n\n // If we have 'page' or 'paging' in our data, that's a good\n // sign. Add a bonus.\n if (!positiveMatch && PAGE_RE.test(parentData)) {\n positiveMatch = true;\n score += 25;\n }\n\n // If we have 'comment' or something in our data, and\n // we don't have something like 'content' as well, that's\n // a bad sign. Give a penalty.\n if (!negativeMatch && NEGATIVE_SCORE_RE.test(parentData) && EXTRANEOUS_LINK_HINTS_RE$1.test(parentData)) {\n if (!POSITIVE_SCORE_RE.test(parentData)) {\n negativeMatch = true;\n score -= 25;\n }\n }\n\n $parent = $parent.parent();\n });\n\n return score;\n}\n\nfunction scorePrevLink(linkData) {\n // If the link has something like \"previous\", its definitely\n // an old link, skip it.\n if (PREV_LINK_TEXT_RE$1.test(linkData)) {\n return -200;\n }\n\n return 0;\n}\n\nfunction shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls) {\n // skip if we've already fetched this url\n if (previousUrls.find(function (url) {\n return href === url;\n }) !== undefined) {\n return false;\n }\n\n // If we've already parsed this URL, or the URL matches the base\n // URL, or is empty, skip it.\n if (!href || href === articleUrl || href === baseUrl) {\n return false;\n }\n\n var hostname = parsedUrl.hostname;\n\n var _URL$parse = URL.parse(href),\n linkHost = _URL$parse.hostname;\n\n // Domain mismatch.\n\n\n if (linkHost !== hostname) {\n return false;\n }\n\n // If href doesn't contain a digit after removing the base URL,\n // it's certainly not the next page.\n var fragment = href.replace(baseUrl, '');\n if (!DIGIT_RE$2.test(fragment)) {\n return false;\n }\n\n // This link has extraneous content (like \"comment\") in its link\n // text, so we skip it.\n if (EXTRANEOUS_LINK_HINTS_RE$1.test(linkText)) {\n return false;\n }\n\n // Next page link text is never long, skip if it is too long.\n if (linkText.length > 25) {\n return false;\n }\n\n return true;\n}\n\nfunction scoreBaseUrl(href, baseRegex) {\n // If the baseUrl isn't part of this URL, penalize this\n // link. It could still be the link, but the odds are lower.\n // Example:\n // http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html\n if (!baseRegex.test(href)) {\n return -25;\n }\n\n return 0;\n}\n\nfunction scoreNextLinkText(linkData) {\n // Things like \"next\", \">>\", etc.\n if (NEXT_LINK_TEXT_RE$1.test(linkData)) {\n return 50;\n }\n\n return 0;\n}\n\nfunction scoreCapLinks(linkData) {\n // Cap links are links like \"last\", etc.\n if (CAP_LINK_TEXT_RE$1.test(linkData)) {\n // If we found a link like \"last\", but we've already seen that\n // this link is also \"next\", it's fine. If it's not been\n // previously marked as \"next\", then it's probably bad.\n // Penalize.\n if (NEXT_LINK_TEXT_RE$1.test(linkData)) {\n return -65;\n }\n }\n\n return 0;\n}\n\nfunction makeBaseRegex(baseUrl) {\n return new RegExp('^' + baseUrl, 'i');\n}\n\nfunction makeSig($link, linkText) {\n return (linkText || $link.text()) + ' ' + ($link.attr('class') || '') + ' ' + ($link.attr('id') || '');\n}\n\nfunction scoreLinks(_ref) {\n var links = _ref.links,\n articleUrl = _ref.articleUrl,\n baseUrl = _ref.baseUrl,\n parsedUrl = _ref.parsedUrl,\n $ = _ref.$,\n _ref$previousUrls = _ref.previousUrls,\n previousUrls = _ref$previousUrls === undefined ? [] : _ref$previousUrls;\n\n parsedUrl = parsedUrl || URL.parse(articleUrl);\n var baseRegex = makeBaseRegex(baseUrl);\n var isWp = isWordpress($);\n\n // Loop through all links, looking for hints that they may be next-page\n // links. Things like having \"page\" in their textContent, className or\n // id, or being a child of a node with a page-y className or id.\n //\n // After we do that, assign each page a score, and pick the one that\n // looks most like the next page link, as long as its score is strong\n // enough to have decent confidence.\n var scoredPages = links.reduce(function (possiblePages, link) {\n // Remove any anchor data since we don't do a good job\n // standardizing URLs (it's hard), we're going to do\n // some checking with and without a trailing slash\n var href = removeAnchor(link.attribs.href);\n var $link = $(link);\n var linkText = $link.text();\n\n if (!shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls)) {\n return possiblePages;\n }\n\n // ## PASSED THE FIRST-PASS TESTS. Start scoring. ##\n if (!possiblePages[href]) {\n possiblePages[href] = {\n score: 0,\n linkText: linkText,\n href: href\n };\n } else {\n possiblePages[href].linkText = possiblePages[href].linkText + '|' + linkText;\n }\n\n var possiblePage = possiblePages[href];\n var linkData = makeSig($link, linkText);\n var pageNum = pageNumFromUrl(href);\n\n var score = scoreBaseUrl(href, baseRegex);\n score += scoreNextLinkText(linkData);\n score += scoreCapLinks(linkData);\n score += scorePrevLink(linkData);\n score += scoreByParents$1($link);\n score += scoreExtraneousLinks(href);\n score += scorePageInLink(pageNum, isWp);\n score += scoreLinkText(linkText, pageNum);\n score += scoreSimilarity(score, articleUrl, href);\n\n possiblePage.score = score;\n\n return possiblePages;\n }, {});\n\n return _Reflect$ownKeys(scoredPages).length === 0 ? null : scoredPages;\n}\n\n// Looks for and returns next page url\n// for multi-page articles\nvar GenericNextPageUrlExtractor = {\n extract: function extract(_ref) {\n var $ = _ref.$,\n url = _ref.url,\n parsedUrl = _ref.parsedUrl,\n _ref$previousUrls = _ref.previousUrls,\n previousUrls = _ref$previousUrls === undefined ? [] : _ref$previousUrls;\n\n parsedUrl = parsedUrl || URL.parse(url);\n\n var articleUrl = removeAnchor(url);\n var baseUrl = articleBaseUrl(url, parsedUrl);\n\n var links = $('a[href]').toArray();\n\n var scoredLinks = scoreLinks({\n links: links,\n articleUrl: articleUrl,\n baseUrl: baseUrl,\n parsedUrl: parsedUrl,\n $: $,\n previousUrls: previousUrls\n });\n\n // If no links were scored, return null\n if (!scoredLinks) return null;\n\n // now that we've scored all possible pages,\n // find the biggest one.\n var topPage = _Reflect$ownKeys(scoredLinks).reduce(function (acc, link) {\n var scoredLink = scoredLinks[link];\n return scoredLink.score > acc.score ? scoredLink : acc;\n }, { score: -100 });\n\n // If the score is less than 50, we're not confident enough to use it,\n // so we fail.\n if (topPage.score >= 50) {\n return topPage.href;\n }\n\n return null;\n }\n};\n\nvar CANONICAL_META_SELECTORS = ['og:url'];\n\nfunction parseDomain(url) {\n var parsedUrl = URL.parse(url);\n var hostname = parsedUrl.hostname;\n\n return hostname;\n}\n\nfunction result(url) {\n return {\n url: url,\n domain: parseDomain(url)\n };\n}\n\nvar GenericUrlExtractor = {\n extract: function extract(_ref) {\n var $ = _ref.$,\n url = _ref.url,\n metaCache = _ref.metaCache;\n\n var $canonical = $('link[rel=canonical]');\n if ($canonical.length !== 0) {\n var href = $canonical.attr('href');\n if (href) {\n return result(href);\n }\n }\n\n var metaUrl = extractFromMeta$$1($, CANONICAL_META_SELECTORS, metaCache);\n if (metaUrl) {\n return result(metaUrl);\n }\n\n return result(url);\n }\n};\n\nvar EXCERPT_META_SELECTORS = ['og:description', 'twitter:description'];\n\nfunction clean$2(content, $) {\n var maxLength = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 200;\n\n content = content.replace(/[\\s\\n]+/g, ' ').trim();\n return ellipsize(content, maxLength, { ellipse: '…' });\n}\n\nvar GenericExcerptExtractor = {\n extract: function extract(_ref) {\n var $ = _ref.$,\n content = _ref.content,\n metaCache = _ref.metaCache;\n\n var excerpt = extractFromMeta$$1($, EXCERPT_META_SELECTORS, metaCache);\n if (excerpt) {\n return clean$2(stripTags(excerpt, $));\n }\n // Fall back to excerpting from the extracted content\n var maxLength = 200;\n var shortContent = content.slice(0, maxLength * 5);\n return clean$2($(shortContent).text(), $, maxLength);\n }\n};\n\nvar GenericWordCountExtractor = {\n extract: function extract(_ref) {\n var content = _ref.content;\n\n var $ = cheerio.load(content);\n\n var text = normalizeSpaces($('div').first().text());\n return text.split(/\\s/).length;\n }\n};\n\nvar GenericExtractor = {\n // This extractor is the default for all domains\n domain: '*',\n title: GenericTitleExtractor.extract,\n date_published: GenericDatePublishedExtractor.extract,\n author: GenericAuthorExtractor.extract,\n content: GenericContentExtractor.extract.bind(GenericContentExtractor),\n lead_image_url: GenericLeadImageUrlExtractor.extract,\n dek: GenericDekExtractor.extract,\n next_page_url: GenericNextPageUrlExtractor.extract,\n url_and_domain: GenericUrlExtractor.extract,\n excerpt: GenericExcerptExtractor.extract,\n word_count: GenericWordCountExtractor.extract,\n direction: function direction(_ref) {\n var title = _ref.title;\n return stringDirection.getDirection(title);\n },\n\n extract: function extract(options) {\n var html = options.html;\n\n\n if (html) {\n var $ = cheerio.load(html);\n options.$ = $;\n }\n\n var title = this.title(options);\n var date_published = this.date_published(options);\n var author = this.author(options);\n var content = this.content(_extends({}, options, { title: title }));\n var lead_image_url = this.lead_image_url(_extends({}, options, { content: content }));\n var dek = this.dek(_extends({}, options, { content: content }));\n var next_page_url = this.next_page_url(options);\n var excerpt = this.excerpt(_extends({}, options, { content: content }));\n var word_count = this.word_count(_extends({}, options, { content: content }));\n var direction = this.direction({ title: title });\n\n var _url_and_domain = this.url_and_domain(options),\n url = _url_and_domain.url,\n domain = _url_and_domain.domain;\n\n return {\n title: title,\n author: author,\n date_published: date_published || null,\n dek: dek,\n lead_image_url: lead_image_url,\n content: content,\n next_page_url: next_page_url,\n url: url,\n domain: domain,\n excerpt: excerpt,\n word_count: word_count,\n direction: direction\n };\n }\n};\n\nfunction getExtractor(url, parsedUrl) {\n parsedUrl = parsedUrl || URL.parse(url);\n var _parsedUrl = parsedUrl,\n hostname = _parsedUrl.hostname;\n\n var baseDomain = hostname.split('.').slice(-2).join('.');\n\n return Extractors[hostname] || Extractors[baseDomain] || GenericExtractor;\n}\n\n// Remove elements by an array of selectors\nfunction cleanBySelectors($content, $, _ref) {\n var clean = _ref.clean;\n\n if (!clean) return $content;\n\n $(clean.join(','), $content).remove();\n\n return $content;\n}\n\n// Transform matching elements\nfunction transformElements($content, $, _ref2) {\n var transforms = _ref2.transforms;\n\n if (!transforms) return $content;\n\n _Reflect$ownKeys(transforms).forEach(function (key) {\n var $matches = $(key, $content);\n var value = transforms[key];\n\n // If value is a string, convert directly\n if (typeof value === 'string') {\n $matches.each(function (index, node) {\n convertNodeTo($(node), $, transforms[key]);\n });\n } else if (typeof value === 'function') {\n // If value is function, apply function to node\n $matches.each(function (index, node) {\n var result = value($(node), $);\n // If function returns a string, convert node to that value\n if (typeof result === 'string') {\n convertNodeTo($(node), $, result);\n }\n });\n }\n });\n\n return $content;\n}\n\nfunction findMatchingSelector($, selectors) {\n return selectors.find(function (selector) {\n if (Array.isArray(selector)) {\n var _selector = _slicedToArray(selector, 2),\n s = _selector[0],\n attr = _selector[1];\n\n return $(s).length === 1 && $(s).attr(attr) && $(s).attr(attr).trim() !== '';\n }\n\n return $(selector).length === 1 && $(selector).text().trim() !== '';\n });\n}\n\nfunction select(opts) {\n var $ = opts.$,\n type = opts.type,\n extractionOpts = opts.extractionOpts,\n _opts$extractHtml = opts.extractHtml,\n extractHtml = _opts$extractHtml === undefined ? false : _opts$extractHtml;\n // Skip if there's not extraction for this type\n\n if (!extractionOpts) return null;\n\n // If a string is hardcoded for a type (e.g., Wikipedia\n // contributors), return the string\n if (typeof extractionOpts === 'string') return extractionOpts;\n\n var selectors = extractionOpts.selectors,\n _extractionOpts$defau = extractionOpts.defaultCleaner,\n defaultCleaner = _extractionOpts$defau === undefined ? true : _extractionOpts$defau;\n\n\n var matchingSelector = findMatchingSelector($, selectors);\n\n if (!matchingSelector) return null;\n\n // Declaring result; will contain either\n // text or html, which will be cleaned\n // by the appropriate cleaner type\n\n // If the selector type requests html as its return type\n // transform and clean the element with provided selectors\n if (extractHtml) {\n var $content = $(matchingSelector);\n\n // Wrap in div so transformation can take place on root element\n $content.wrap($(''));\n $content = $content.parent();\n\n $content = transformElements($content, $, extractionOpts);\n $content = cleanBySelectors($content, $, extractionOpts);\n\n $content = Cleaners[type]($content, _extends({}, opts, { defaultCleaner: defaultCleaner }));\n\n return $.html($content);\n }\n\n var result = void 0;\n\n // if selector is an array (e.g., ['img', 'src']),\n // extract the attr\n if (Array.isArray(matchingSelector)) {\n var _matchingSelector = _slicedToArray(matchingSelector, 2),\n selector = _matchingSelector[0],\n attr = _matchingSelector[1];\n\n result = $(selector).attr(attr).trim();\n } else {\n result = $(matchingSelector).text().trim();\n }\n\n // Allow custom extractor to skip default cleaner\n // for this type; defaults to true\n if (defaultCleaner) {\n return Cleaners[type](result, opts);\n }\n\n return result;\n}\n\nfunction extractResult(opts) {\n var type = opts.type,\n extractor = opts.extractor,\n _opts$fallback = opts.fallback,\n fallback = _opts$fallback === undefined ? true : _opts$fallback;\n\n\n var result = select(_extends({}, opts, { extractionOpts: extractor[type] }));\n\n // If custom parser succeeds, return the result\n if (result) {\n return result;\n }\n\n // If nothing matches the selector, and fallback is enabled,\n // run the Generic extraction\n if (fallback) return GenericExtractor[type](opts);\n\n return null;\n}\n\nvar RootExtractor = {\n extract: function extract() {\n var extractor = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : GenericExtractor;\n var opts = arguments[1];\n var _opts = opts,\n contentOnly = _opts.contentOnly,\n extractedTitle = _opts.extractedTitle;\n // This is the generic extractor. Run its extract method\n\n if (extractor.domain === '*') return extractor.extract(opts);\n\n opts = _extends({}, opts, {\n extractor: extractor\n });\n\n if (contentOnly) {\n var _content = extractResult(_extends({}, opts, { type: 'content', extractHtml: true, title: extractedTitle\n }));\n return {\n content: _content\n };\n }\n var title = extractResult(_extends({}, opts, { type: 'title' }));\n var date_published = extractResult(_extends({}, opts, { type: 'date_published' }));\n var author = extractResult(_extends({}, opts, { type: 'author' }));\n var next_page_url = extractResult(_extends({}, opts, { type: 'next_page_url' }));\n var content = extractResult(_extends({}, opts, { type: 'content', extractHtml: true, title: title\n }));\n var lead_image_url = extractResult(_extends({}, opts, { type: 'lead_image_url', content: content }));\n var excerpt = extractResult(_extends({}, opts, { type: 'excerpt', content: content }));\n var dek = extractResult(_extends({}, opts, { type: 'dek', content: content, excerpt: excerpt }));\n var word_count = extractResult(_extends({}, opts, { type: 'word_count', content: content }));\n var direction = extractResult(_extends({}, opts, { type: 'direction', title: title }));\n\n var _ref3 = extractResult(_extends({}, opts, { type: 'url_and_domain' })) || { url: null, domain: null },\n url = _ref3.url,\n domain = _ref3.domain;\n\n return {\n title: title,\n content: content,\n author: author,\n date_published: date_published,\n lead_image_url: lead_image_url,\n dek: dek,\n next_page_url: next_page_url,\n url: url,\n domain: domain,\n excerpt: excerpt,\n word_count: word_count,\n direction: direction\n };\n }\n};\n\nvar collectAllPages = (function () {\n var _ref = _asyncToGenerator(_regeneratorRuntime.mark(function _callee(_ref2) {\n var next_page_url = _ref2.next_page_url,\n html = _ref2.html,\n $ = _ref2.$,\n metaCache = _ref2.metaCache,\n result = _ref2.result,\n Extractor = _ref2.Extractor,\n title = _ref2.title,\n url = _ref2.url;\n var pages, previousUrls, extractorOpts, nextPageResult, word_count;\n return _regeneratorRuntime.wrap(function _callee$(_context) {\n while (1) {\n switch (_context.prev = _context.next) {\n case 0:\n // At this point, we've fetched just the first page\n pages = 1;\n previousUrls = [removeAnchor(url)];\n\n // If we've gone over 26 pages, something has\n // likely gone wrong.\n\n case 2:\n if (!(next_page_url && pages < 26)) {\n _context.next = 15;\n break;\n }\n\n pages += 1;\n _context.next = 6;\n return Resource.create(next_page_url);\n\n case 6:\n $ = _context.sent;\n\n html = $.html();\n\n extractorOpts = {\n url: next_page_url,\n html: html,\n $: $,\n metaCache: metaCache,\n contentOnly: true,\n extractedTitle: title,\n previousUrls: previousUrls\n };\n nextPageResult = RootExtractor.extract(Extractor, extractorOpts);\n\n\n previousUrls.push(next_page_url);\n result = _extends({}, result, {\n content: '\\n ' + result.content + '\\n \\n
' });\n return _context.abrupt('return', _extends({}, result, {\n total_pages: pages,\n pages_rendered: pages,\n word_count: word_count\n }));\n\n case 17:\n case 'end':\n return _context.stop();\n }\n }\n }, _callee, this);\n }));\n\n function collectAllPages(_x) {\n return _ref.apply(this, arguments);\n }\n\n return collectAllPages;\n})();\n\nvar Mercury = {\n parse: function parse(url, html) {\n var _this = this;\n\n var opts = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {};\n return _asyncToGenerator(_regeneratorRuntime.mark(function _callee() {\n var _opts$fetchAllPages, fetchAllPages, _opts$fallback, fallback, parsedUrl, Extractor, $, metaCache, result, _result, title, next_page_url;\n\n return _regeneratorRuntime.wrap(function _callee$(_context) {\n while (1) {\n switch (_context.prev = _context.next) {\n case 0:\n _opts$fetchAllPages = opts.fetchAllPages, fetchAllPages = _opts$fetchAllPages === undefined ? true : _opts$fetchAllPages, _opts$fallback = opts.fallback, fallback = _opts$fallback === undefined ? true : _opts$fallback;\n parsedUrl = URL.parse(url);\n\n if (validateUrl(parsedUrl)) {\n _context.next = 4;\n break;\n }\n\n return _context.abrupt('return', Errors.badUrl);\n\n case 4:\n Extractor = getExtractor(url, parsedUrl);\n // console.log(`Using extractor for ${Extractor.domain}`);\n\n _context.next = 7;\n return Resource.create(url, html, parsedUrl);\n\n case 7:\n $ = _context.sent;\n\n if (!$.error) {\n _context.next = 10;\n break;\n }\n\n return _context.abrupt('return', $);\n\n case 10:\n\n html = $.html();\n\n // Cached value of every meta name in our document.\n // Used when extracting title/author/date_published/dek\n metaCache = $('meta').map(function (_, node) {\n return $(node).attr('name');\n }).toArray();\n result = RootExtractor.extract(Extractor, { url: url, html: html, $: $, metaCache: metaCache, parsedUrl: parsedUrl, fallback: fallback });\n _result = result, title = _result.title, next_page_url = _result.next_page_url;\n\n // Fetch more pages if next_page_url found\n\n if (!(fetchAllPages && next_page_url)) {\n _context.next = 20;\n break;\n }\n\n _context.next = 17;\n return collectAllPages({\n Extractor: Extractor,\n next_page_url: next_page_url,\n html: html,\n $: $,\n metaCache: metaCache,\n result: result,\n title: title,\n url: url\n });\n\n case 17:\n result = _context.sent;\n _context.next = 21;\n break;\n\n case 20:\n result = _extends({}, result, {\n total_pages: 1,\n rendered_pages: 1\n });\n\n case 21:\n return _context.abrupt('return', result);\n\n case 22:\n case 'end':\n return _context.stop();\n }\n }\n }, _callee, _this);\n }))();\n },\n\n\n // A convenience method for getting a resource\n // to work with, e.g., for custom extractor generator\n fetchResource: function fetchResource(url) {\n var _this2 = this;\n\n return _asyncToGenerator(_regeneratorRuntime.mark(function _callee2() {\n return _regeneratorRuntime.wrap(function _callee2$(_context2) {\n while (1) {\n switch (_context2.prev = _context2.next) {\n case 0:\n _context2.next = 2;\n return Resource.create(url);\n\n case 2:\n return _context2.abrupt('return', _context2.sent);\n\n case 3:\n case 'end':\n return _context2.stop();\n }\n }\n }, _callee2, _this2);\n }))();\n }\n};\n\nmodule.exports = Mercury;\n//# sourceMappingURL=mercury.js.map\n","// Spacer images to be removed\nexport const SPACER_RE = new RegExp('trans|transparent|spacer|blank', 'i');\n\n// The class we will use to mark elements we want to keep\n// but would normally remove\nexport const KEEP_CLASS = 'mercury-parser-keep';\n\nexport const KEEP_SELECTORS = [\n 'iframe[src^=\"https://www.youtube.com\"]',\n 'iframe[src^=\"http://www.youtube.com\"]',\n 'iframe[src^=\"https://player.vimeo\"]',\n 'iframe[src^=\"http://player.vimeo\"]',\n];\n\n// A list of tags to strip from the output if we encounter them.\nexport const STRIP_OUTPUT_TAGS = [\n 'title',\n 'script',\n 'noscript',\n 'link',\n 'style',\n 'hr',\n 'embed',\n 'iframe',\n 'object',\n];\n\n// cleanAttributes\nexport const REMOVE_ATTRS = ['style', 'align'];\nexport const REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(selector => `[${selector}]`);\nexport const REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');\nexport const WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt'];\nexport const WHITELIST_ATTRS_RE = new RegExp(`^(${WHITELIST_ATTRS.join('|')})$`, 'i');\n\n// removeEmpty\nexport const REMOVE_EMPTY_TAGS = ['p'];\nexport const REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(tag => `${tag}:empty`).join(',');\n\n// cleanTags\nexport const CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');\n\n// cleanHeaders\nconst HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];\nexport const HEADER_TAG_LIST = HEADER_TAGS.join(',');\n\n// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n 'ad-break',\n 'adbox',\n 'advert',\n 'addthis',\n 'agegate',\n 'aux',\n 'blogger-labels',\n 'combx',\n 'comment',\n 'conversation',\n 'disqus',\n 'entry-unrelated',\n 'extra',\n 'foot',\n // 'form', // This is too generic, has too many false positives\n 'header',\n 'hidden',\n 'loader',\n 'login', // Note: This can hit 'blogindex'.\n 'menu',\n 'meta',\n 'nav',\n 'outbrain',\n 'pager',\n 'pagination',\n 'predicta', // readwriteweb inline ad box\n 'presence_control_external', // lifehacker.com container full of false positives\n 'popup',\n 'printfriendly',\n 'related',\n 'remove',\n 'remark',\n 'rss',\n 'share',\n 'shoutbox',\n 'sidebar',\n 'sociable',\n 'sponsor',\n 'taboola',\n 'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n 'and',\n 'article',\n 'body',\n 'blogindex',\n 'column',\n 'content',\n 'entry-content-asset',\n 'format', // misuse of form\n 'hfeed',\n 'hentry',\n 'hatom',\n 'main',\n 'page',\n 'posts',\n 'shadow',\n];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n 'a',\n 'blockquote',\n 'dl',\n 'div',\n 'img',\n 'p',\n 'pre',\n 'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n 'br',\n 'b',\n 'i',\n 'label',\n 'hr',\n 'area',\n 'base',\n 'basefont',\n 'input',\n 'img',\n 'link',\n 'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE =\n new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n ['.hentry', '.entry-content'],\n ['entry', '.entry-content'],\n ['.entry', '.entry_content'],\n ['.post', '.postbody'],\n ['.post', '.post_body'],\n ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = [\n 'figure',\n 'photo',\n 'image',\n 'caption',\n];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n 'article',\n 'articlecontent',\n 'instapaper_body',\n 'blog',\n 'body',\n 'content',\n 'entry-content-asset',\n 'entry',\n 'hentry',\n 'main',\n 'Normal',\n 'page',\n 'pagination',\n 'permalink',\n 'post',\n 'story',\n 'text',\n '[-_]copy', // usatoday\n '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n 'adbox',\n 'advert',\n 'author',\n 'bio',\n 'bookmark',\n 'bottom',\n 'byline',\n 'clear',\n 'com-',\n 'combx',\n 'comment',\n 'comment\\\\B',\n 'contact',\n 'copy',\n 'credit',\n 'crumb',\n 'date',\n 'deck',\n 'excerpt',\n 'featured', // tnr.com has a featured_content which throws us off\n 'foot',\n 'footer',\n 'footnote',\n 'graf',\n 'head',\n 'info',\n 'infotext', // newscientist.com copyright\n 'instapaper_ignore',\n 'jump',\n 'linebreak',\n 'link',\n 'masthead',\n 'media',\n 'meta',\n 'modal',\n 'outbrain', // slate.com junk\n 'promo',\n 'pr_', // autoblog - press release\n 'related',\n 'respond',\n 'roundcontent', // lifehacker restricted content warning\n 'scroll',\n 'secondary',\n 'share',\n 'shopping',\n 'shoutbox',\n 'side',\n 'sidebar',\n 'sponsor',\n 'stamp',\n 'sub',\n 'summary',\n 'tags',\n 'tools',\n 'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// XPath to try to determine if a page is wordpress. Not always successful.\nexport const IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nexport const EXTRANEOUS_LINK_HINTS = [\n 'print',\n 'archive',\n 'comment',\n 'discuss',\n 'e-mail',\n 'email',\n 'share',\n 'reply',\n 'all',\n 'login',\n 'sign',\n 'single',\n 'adx',\n 'entry-unrelated',\n];\nexport const EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nexport const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\n// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))', 'i');\nexport const NEXT_LINK_TEXT_RE = /(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))/i;\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nexport const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nexport const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match 2 or more consecutive tags\nexport const BR_TAGS_RE = new RegExp('( ]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp(' ]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n 'article',\n 'aside',\n 'blockquote',\n 'body',\n 'br',\n 'button',\n 'canvas',\n 'caption',\n 'col',\n 'colgroup',\n 'dd',\n 'div',\n 'dl',\n 'dt',\n 'embed',\n 'fieldset',\n 'figcaption',\n 'figure',\n 'footer',\n 'form',\n 'h1',\n 'h2',\n 'h3',\n 'h4',\n 'h5',\n 'h6',\n 'header',\n 'hgroup',\n 'hr',\n 'li',\n 'map',\n 'object',\n 'ol',\n 'output',\n 'p',\n 'pre',\n 'progress',\n 'section',\n 'table',\n 'tbody',\n 'textarea',\n 'tfoot',\n 'th',\n 'thead',\n 'tr',\n 'ul',\n 'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import { paragraphize } from './index';\n\n// ## NOTES:\n// Another good candidate for refactoring/optimizing.\n// Very imperative code, I don't love it. - AP\n\n// Given cheerio object, convert consecutive tags into\n// tags instead.\n//\n// :param $: A cheerio object\n\nexport default function brsToPs($) {\n let collapsing = false;\n $('br').each((index, element) => {\n const nextElement = $(element).next().get(0);\n\n if (nextElement && nextElement.tagName === 'br') {\n collapsing = true;\n $(element).remove();\n } else if (collapsing) {\n collapsing = false;\n // $(element).replaceWith('')\n paragraphize(element, $, true);\n }\n });\n\n return $;\n}\n","import { BLOCK_LEVEL_TAGS_RE } from './constants';\n\n// Given a node, turn it into a P if it is not already a P, and\n// make sure it conforms to the constraints of a P tag (I.E. does\n// not contain any other block tags.)\n//\n// If the node is a , it treats the following inline siblings\n// as if they were its children.\n//\n// :param node: The node to paragraphize; this is a raw node\n// :param $: The cheerio object to handle dom manipulation\n// :param br: Whether or not the passed node is a br\n\nexport default function paragraphize(node, $, br = false) {\n const $node = $(node);\n\n if (br) {\n let sibling = node.nextSibling;\n const p = $('');\n\n // while the next node is text or not a block level element\n // append it to a new p node\n while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))) {\n const nextSibling = sibling.nextSibling;\n $(sibling).appendTo(p);\n sibling = nextSibling;\n }\n\n $node.replaceWith(p);\n $node.remove();\n return $;\n }\n\n return $;\n}\n","import { brsToPs, convertNodeTo } from 'utils/dom';\n\nimport { DIV_TO_P_BLOCK_TAGS } from './constants';\n\nfunction convertDivs($) {\n $('div').each((index, div) => {\n const $div = $(div);\n const convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;\n\n if (convertable) {\n convertNodeTo($div, $, 'p');\n }\n });\n\n return $;\n}\n\nfunction convertSpans($) {\n $('span').each((index, span) => {\n const $span = $(span);\n const convertable = $span.parents('p, div').length === 0;\n if (convertable) {\n convertNodeTo($span, $, 'p');\n }\n });\n\n return $;\n}\n\n// Loop through the provided doc, and convert any p-like elements to\n// actual paragraph tags.\n//\n// Things fitting this criteria:\n// * Multiple consecutive tags.\n// * tags without block level elements inside of them\n// * tags who are not children of or tags.\n//\n// :param $: A cheerio object to search\n// :return cheerio object with new p elements\n// (By-reference mutation, though. Returned just for convenience.)\n\nexport default function convertToParagraphs($) {\n $ = brsToPs($);\n $ = convertDivs($);\n $ = convertSpans($);\n\n return $;\n}\n","export default function convertNodeTo($node, $, tag = 'p') {\n const node = $node.get(0);\n if (!node) {\n return $;\n }\n const { attribs } = $node.get(0);\n const attribString = Reflect.ownKeys(attribs)\n .map(key => `${key}=${attribs[key]}`)\n .join(' ');\n\n $node.replaceWith(`<${tag} ${attribString}>${$node.contents()}${tag}>`);\n return $;\n}\n","import { SPACER_RE } from './constants';\n\nfunction cleanForHeight($img, $) {\n const height = parseInt($img.attr('height'), 10);\n const width = parseInt($img.attr('width'), 10) || 20;\n\n // Remove images that explicitly have very small heights or\n // widths, because they are most likely shims or icons,\n // which aren't very useful for reading.\n if ((height || 20) < 10 || width < 10) {\n $img.remove();\n } else if (height) {\n // Don't ever specify a height on images, so that we can\n // scale with respect to width without screwing up the\n // aspect ratio.\n $img.removeAttr('height');\n }\n\n return $;\n}\n\n// Cleans out images where the source string matches transparent/spacer/etc\n// TODO This seems very aggressive - AP\nfunction removeSpacers($img, $) {\n if (SPACER_RE.test($img.attr('src'))) {\n $img.remove();\n }\n\n return $;\n}\n\nexport default function cleanImages($article, $) {\n $article.find('img').each((index, img) => {\n const $img = $(img);\n\n cleanForHeight($img, $);\n removeSpacers($img, $);\n });\n\n return $;\n}\n","import {\n STRIP_OUTPUT_TAGS,\n KEEP_CLASS,\n} from './constants';\n\nexport default function stripJunkTags(article, $, tags = []) {\n if (tags.length === 0) {\n tags = STRIP_OUTPUT_TAGS;\n }\n\n // Remove matching elements, but ignore\n // any element with a class of mercury-parser-keep\n $(tags.join(','), article).not(`.${KEEP_CLASS}`).remove();\n\n // Remove the mercury-parser-keep class from result\n $(`.${KEEP_CLASS}`, article).removeClass(KEEP_CLASS);\n\n return $;\n}\n","import { WHITELIST_ATTRS_RE } from './constants';\n\nfunction removeAllButWhitelist($article) {\n $article.find('*').each((index, node) => {\n node.attribs = Reflect.ownKeys(node.attribs).reduce((acc, attr) => {\n if (WHITELIST_ATTRS_RE.test(attr)) {\n return { ...acc, [attr]: node.attribs[attr] };\n }\n\n return acc;\n }, {});\n });\n\n return $article;\n}\n\n// function removeAttrs(article, $) {\n// REMOVE_ATTRS.forEach((attr) => {\n// $(`[${attr}]`, article).removeAttr(attr);\n// });\n// }\n\n// Remove attributes like style or align\nexport default function cleanAttributes($article) {\n // Grabbing the parent because at this point\n // $article will be wrapped in a div which will\n // have a score set on it.\n return removeAllButWhitelist(\n $article.parent().length ?\n $article.parent() : $article\n );\n}\n","// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n 'ad-break',\n 'adbox',\n 'advert',\n 'addthis',\n 'agegate',\n 'aux',\n 'blogger-labels',\n 'combx',\n 'comment',\n 'conversation',\n 'disqus',\n 'entry-unrelated',\n 'extra',\n 'foot',\n 'form',\n 'header',\n 'hidden',\n 'loader',\n 'login', // Note: This can hit 'blogindex'.\n 'menu',\n 'meta',\n 'nav',\n 'pager',\n 'pagination',\n 'predicta', // readwriteweb inline ad box\n 'presence_control_external', // lifehacker.com container full of false positives\n 'popup',\n 'printfriendly',\n 'related',\n 'remove',\n 'remark',\n 'rss',\n 'share',\n 'shoutbox',\n 'sidebar',\n 'sociable',\n 'sponsor',\n 'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n 'and',\n 'article',\n 'body',\n 'blogindex',\n 'column',\n 'content',\n 'entry-content-asset',\n 'format', // misuse of form\n 'hfeed',\n 'hentry',\n 'hatom',\n 'main',\n 'page',\n 'posts',\n 'shadow',\n];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n 'a',\n 'blockquote',\n 'dl',\n 'div',\n 'img',\n 'p',\n 'pre',\n 'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n 'br',\n 'b',\n 'i',\n 'label',\n 'hr',\n 'area',\n 'base',\n 'basefont',\n 'input',\n 'img',\n 'link',\n 'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE =\n new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n ['.hentry', '.entry-content'],\n ['entry', '.entry-content'],\n ['.entry', '.entry_content'],\n ['.post', '.postbody'],\n ['.post', '.post_body'],\n ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = [\n 'figure',\n 'photo',\n 'image',\n 'caption',\n];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n 'article',\n 'articlecontent',\n 'instapaper_body',\n 'blog',\n 'body',\n 'content',\n 'entry-content-asset',\n 'entry',\n 'hentry',\n 'main',\n 'Normal',\n 'page',\n 'pagination',\n 'permalink',\n 'post',\n 'story',\n 'text',\n '[-_]copy', // usatoday\n '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n 'adbox',\n 'advert',\n 'author',\n 'bio',\n 'bookmark',\n 'bottom',\n 'byline',\n 'clear',\n 'com-',\n 'combx',\n 'comment',\n 'comment\\\\B',\n 'contact',\n 'copy',\n 'credit',\n 'crumb',\n 'date',\n 'deck',\n 'excerpt',\n 'featured', // tnr.com has a featured_content which throws us off\n 'foot',\n 'footer',\n 'footnote',\n 'graf',\n 'head',\n 'info',\n 'infotext', // newscientist.com copyright\n 'instapaper_ignore',\n 'jump',\n 'linebreak',\n 'link',\n 'masthead',\n 'media',\n 'meta',\n 'modal',\n 'outbrain', // slate.com junk\n 'promo',\n 'pr_', // autoblog - press release\n 'related',\n 'respond',\n 'roundcontent', // lifehacker restricted content warning\n 'scroll',\n 'secondary',\n 'share',\n 'shopping',\n 'shoutbox',\n 'side',\n 'sidebar',\n 'sponsor',\n 'stamp',\n 'sub',\n 'summary',\n 'tags',\n 'tools',\n 'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// Match 2 or more consecutive tags\nexport const BR_TAGS_RE = new RegExp('( ]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp(' ]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n 'article',\n 'aside',\n 'blockquote',\n 'body',\n 'br',\n 'button',\n 'canvas',\n 'caption',\n 'col',\n 'colgroup',\n 'dd',\n 'div',\n 'dl',\n 'dt',\n 'embed',\n 'fieldset',\n 'figcaption',\n 'figure',\n 'footer',\n 'form',\n 'h1',\n 'h2',\n 'h3',\n 'h4',\n 'h5',\n 'h6',\n 'header',\n 'hgroup',\n 'hr',\n 'li',\n 'map',\n 'object',\n 'ol',\n 'output',\n 'p',\n 'pre',\n 'progress',\n 'section',\n 'table',\n 'tbody',\n 'textarea',\n 'tfoot',\n 'th',\n 'thead',\n 'tr',\n 'ul',\n 'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import {\n NEGATIVE_SCORE_RE,\n POSITIVE_SCORE_RE,\n PHOTO_HINTS_RE,\n READABILITY_ASSET,\n} from './constants';\n\n// Get the score of a node based on its className and id.\nexport default function getWeight(node) {\n const classes = node.attr('class');\n const id = node.attr('id');\n let score = 0;\n\n if (id) {\n // if id exists, try to score on both positive and negative\n if (POSITIVE_SCORE_RE.test(id)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE.test(id)) {\n score -= 25;\n }\n }\n\n if (classes) {\n if (score === 0) {\n // if classes exist and id did not contribute to score\n // try to score on both positive and negative\n if (POSITIVE_SCORE_RE.test(classes)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE.test(classes)) {\n score -= 25;\n }\n }\n\n // even if score has been set by id, add score for\n // possible photo matches\n // \"try to keep photos if we can\"\n if (PHOTO_HINTS_RE.test(classes)) {\n score += 10;\n }\n\n // add 25 if class matches entry-content-asset,\n // a class apparently instructed for use in the\n // Readability publisher guidelines\n // https://www.readability.com/developers/guidelines\n if (READABILITY_ASSET.test(classes)) {\n score += 25;\n }\n }\n\n return score;\n}\n","// returns the score of a node based on\n// the node's score attribute\n// returns null if no score set\nexport default function getScore($node) {\n return parseFloat($node.attr('score')) || null;\n}\n","// return 1 for every comma in text\nexport default function scoreCommas(text) {\n return (text.match(/,/g) || []).length;\n}\n","const idkRe = new RegExp('^(p|pre)$', 'i');\n\nexport default function scoreLength(textLength, tagName = 'p') {\n const chunks = textLength / 50;\n\n if (chunks > 0) {\n let lengthBonus;\n\n // No idea why p or pre are being tamped down here\n // but just following the source for now\n // Not even sure why tagName is included here,\n // since this is only being called from the context\n // of scoreParagraph\n if (idkRe.test(tagName)) {\n lengthBonus = chunks - 2;\n } else {\n lengthBonus = chunks - 1.25;\n }\n\n return Math.min(Math.max(lengthBonus, 0), 3);\n }\n\n return 0;\n}\n","import {\n scoreCommas,\n scoreLength,\n} from './index';\n\n// Score a paragraph using various methods. Things like number of\n// commas, etc. Higher is better.\nexport default function scoreParagraph(node) {\n let score = 1;\n const text = node.text().trim();\n const textLength = text.length;\n\n // If this paragraph is less than 25 characters, don't count it.\n if (textLength < 25) {\n return 0;\n }\n\n // Add points for any commas within this paragraph\n score += scoreCommas(text);\n\n // For every 50 characters in this paragraph, add another point. Up\n // to 3 points.\n score += scoreLength(textLength);\n\n // Articles can end with short paragraphs when people are being clever\n // but they can also end with short paragraphs setting up lists of junk\n // that we strip. This negative tweaks junk setup paragraphs just below\n // the cutoff threshold.\n if (text.slice(-1) === ':') {\n score -= 1;\n }\n\n return score;\n}\n","export default function setScore($node, $, score) {\n $node.attr('score', score);\n return $node;\n}\n","import {\n getOrInitScore,\n setScore,\n} from './index';\n\nexport default function addScore($node, $, amount) {\n try {\n const score = getOrInitScore($node, $) + amount;\n setScore($node, $, score);\n } catch (e) {\n // Ignoring; error occurs in scoreNode\n }\n\n return $node;\n}\n","import { addScore } from './index';\n\n// Adds 1/4 of a child's score to its parent\nexport default function addToParent(node, $, score) {\n const parent = node.parent();\n if (parent) {\n addScore(parent, $, score * 0.25);\n }\n\n return node;\n}\n","import {\n getScore,\n scoreNode,\n getWeight,\n addToParent,\n} from './index';\n\n// gets and returns the score if it exists\n// if not, initializes a score based on\n// the node's tag type\nexport default function getOrInitScore($node, $, weightNodes = true) {\n let score = getScore($node);\n\n if (score) {\n return score;\n }\n\n score = scoreNode($node);\n\n if (weightNodes) {\n score += getWeight($node);\n }\n\n addToParent($node, $, score);\n\n return score;\n}\n","import { scoreParagraph } from './index';\nimport {\n PARAGRAPH_SCORE_TAGS,\n CHILD_CONTENT_TAGS,\n BAD_TAGS,\n} from './constants';\n\n// Score an individual node. Has some smarts for paragraphs, otherwise\n// just scores based on tag.\nexport default function scoreNode($node) {\n const { tagName } = $node.get(0);\n\n // TODO: Consider ordering by most likely.\n // E.g., if divs are a more common tag on a page,\n // Could save doing that regex test on every node – AP\n if (PARAGRAPH_SCORE_TAGS.test(tagName)) {\n return scoreParagraph($node);\n } else if (tagName === 'div') {\n return 5;\n } else if (CHILD_CONTENT_TAGS.test(tagName)) {\n return 3;\n } else if (BAD_TAGS.test(tagName)) {\n return -3;\n } else if (tagName === 'th') {\n return -5;\n }\n\n return 0;\n}\n","import { convertNodeTo } from 'utils/dom';\n\nimport { HNEWS_CONTENT_SELECTORS } from './constants';\nimport {\n scoreNode,\n setScore,\n getOrInitScore,\n addScore,\n} from './index';\n\nfunction convertSpans($node, $) {\n if ($node.get(0)) {\n const { tagName } = $node.get(0);\n\n if (tagName === 'span') {\n // convert spans to divs\n convertNodeTo($node, $, 'div');\n }\n }\n}\n\nfunction addScoreTo($node, $, score) {\n if ($node) {\n convertSpans($node, $);\n addScore($node, $, score);\n }\n}\n\nfunction scorePs($, weightNodes) {\n $('p, pre').not('[score]').each((index, node) => {\n // The raw score for this paragraph, before we add any parent/child\n // scores.\n let $node = $(node);\n $node = setScore($node, $, getOrInitScore($node, $, weightNodes));\n\n const $parent = $node.parent();\n const rawScore = scoreNode($node);\n\n addScoreTo($parent, $, rawScore, weightNodes);\n if ($parent) {\n // Add half of the individual content score to the\n // grandparent\n addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);\n }\n });\n\n return $;\n}\n\n// score content. Parents get the full value of their children's\n// content score, grandparents half\nexport default function scoreContent($, weightNodes = true) {\n // First, look for special hNews based selectors and give them a big\n // boost, if they exist\n HNEWS_CONTENT_SELECTORS.forEach(([parentSelector, childSelector]) => {\n $(`${parentSelector} ${childSelector}`).each((index, node) => {\n addScore($(node).parent(parentSelector), $, 80);\n });\n });\n\n // Doubling this again\n // Previous solution caused a bug\n // in which parents weren't retaining\n // scores. This is not ideal, and\n // should be fixed.\n scorePs($, weightNodes);\n scorePs($, weightNodes);\n\n return $;\n}\n","const NORMALIZE_RE = /\\s{2,}/g;\n\nexport default function normalizeSpaces(text) {\n return text.replace(NORMALIZE_RE, ' ').trim();\n}\n","// Given a node type to search for, and a list of regular expressions,\n// look to see if this extraction can be found in the URL. Expects\n// that each expression in r_list will return group(1) as the proper\n// string to be cleaned.\n// Only used for date_published currently.\nexport default function extractFromUrl(url, regexList) {\n const matchRe = regexList.find(re => re.test(url));\n if (matchRe) {\n return matchRe.exec(url)[1];\n }\n\n return null;\n}\n","// An expression that looks to try to find the page digit within a URL, if\n// it exists.\n// Matches:\n// page=1\n// pg=1\n// p=1\n// paging=12\n// pag=7\n// pagination/1\n// paging/88\n// pa/83\n// p/11\n//\n// Does not match:\n// pg=102\n// page:2\nexport const PAGE_IN_HREF_RE = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');\n\nexport const HAS_ALPHA_RE = /[a-z]/i;\n\nexport const IS_ALPHA_RE = /^[a-z]+$/i;\nexport const IS_DIGIT_RE = /^[0-9]+$/i;\n","import URL from 'url';\nimport {\n HAS_ALPHA_RE,\n IS_ALPHA_RE,\n IS_DIGIT_RE,\n PAGE_IN_HREF_RE,\n} from './constants';\n\nfunction isGoodSegment(segment, index, firstSegmentHasLetters) {\n let goodSegment = true;\n\n // If this is purely a number, and it's the first or second\n // url_segment, it's probably a page number. Remove it.\n if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {\n goodSegment = true;\n }\n\n // If this is the first url_segment and it's just \"index\",\n // remove it\n if (index === 0 && segment.toLowerCase() === 'index') {\n goodSegment = false;\n }\n\n // If our first or second url_segment is smaller than 3 characters,\n // and the first url_segment had no alphas, remove it.\n if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {\n goodSegment = false;\n }\n\n return goodSegment;\n}\n\n// Take a URL, and return the article base of said URL. That is, no\n// pagination data exists in it. Useful for comparing to other links\n// that might have pagination data within them.\nexport default function articleBaseUrl(url, parsed) {\n const parsedUrl = parsed || URL.parse(url);\n const { protocol, host, path } = parsedUrl;\n\n let firstSegmentHasLetters = false;\n const cleanedSegments = path.split('/')\n .reverse()\n .reduce((acc, rawSegment, index) => {\n let segment = rawSegment;\n\n // Split off and save anything that looks like a file type.\n if (segment.includes('.')) {\n const [possibleSegment, fileExt] = segment.split('.');\n if (IS_ALPHA_RE.test(fileExt)) {\n segment = possibleSegment;\n }\n }\n\n // If our first or second segment has anything looking like a page\n // number, remove it.\n if (PAGE_IN_HREF_RE.test(segment) && index < 2) {\n segment = segment.replace(PAGE_IN_HREF_RE, '');\n }\n\n // If we're on the first segment, check to see if we have any\n // characters in it. The first segment is actually the last bit of\n // the URL, and this will be helpful to determine if we're on a URL\n // segment that looks like \"/2/\" for example.\n if (index === 0) {\n firstSegmentHasLetters = HAS_ALPHA_RE.test(segment);\n }\n\n // If it's not marked for deletion, push it to cleaned_segments.\n if (isGoodSegment(segment, index, firstSegmentHasLetters)) {\n acc.push(segment);\n }\n\n return acc;\n }, []);\n\n return `${protocol}//${host}${cleanedSegments.reverse().join('/')}`;\n}\n","// Given a string, return True if it appears to have an ending sentence\n// within it, false otherwise.\nconst SENTENCE_END_RE = new RegExp('.( |$)');\nexport default function hasSentenceEnd(text) {\n return SENTENCE_END_RE.test(text);\n}\n","import {\n textLength,\n linkDensity,\n} from 'utils/dom';\nimport { hasSentenceEnd } from 'utils/text';\n\nimport { NON_TOP_CANDIDATE_TAGS_RE } from './constants';\nimport { getScore } from './index';\n\n// Now that we have a top_candidate, look through the siblings of\n// it to see if any of them are decently scored. If they are, they\n// may be split parts of the content (Like two divs, a preamble and\n// a body.) Example:\n// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14\nexport default function mergeSiblings($candidate, topScore, $) {\n if (!$candidate.parent().length) {\n return $candidate;\n }\n\n const siblingScoreThreshold = Math.max(10, topScore * 0.25);\n const wrappingDiv = $('');\n\n $candidate.parent().children().each((index, sibling) => {\n const $sibling = $(sibling);\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE.test(sibling.tagName)) {\n return null;\n }\n\n const siblingScore = getScore($sibling);\n if (siblingScore) {\n if ($sibling === $candidate) {\n wrappingDiv.append($sibling);\n } else {\n let contentBonus = 0;\n const density = linkDensity($sibling);\n\n // If sibling has a very low link density,\n // give it a small bonus\n if (density < 0.05) {\n contentBonus += 20;\n }\n\n // If sibling has a high link density,\n // give it a penalty\n if (density >= 0.5) {\n contentBonus -= 20;\n }\n\n // If sibling node has the same class as\n // candidate, give it a bonus\n if ($sibling.attr('class') === $candidate.attr('class')) {\n contentBonus += topScore * 0.2;\n }\n\n const newScore = siblingScore + contentBonus;\n\n if (newScore >= siblingScoreThreshold) {\n return wrappingDiv.append($sibling);\n } else if (sibling.tagName === 'p') {\n const siblingContent = $sibling.text();\n const siblingContentLength = textLength(siblingContent);\n\n if (siblingContentLength > 80 && density < 0.25) {\n return wrappingDiv.append($sibling);\n } else if (siblingContentLength <= 80 && density === 0 &&\n hasSentenceEnd(siblingContent)) {\n return wrappingDiv.append($sibling);\n }\n }\n }\n }\n\n return null;\n });\n\n return wrappingDiv;\n}\n","// Scoring\nexport { default as getWeight } from './get-weight';\nexport { default as getScore } from './get-score';\nexport { default as scoreCommas } from './score-commas';\nexport { default as scoreLength } from './score-length';\nexport { default as scoreParagraph } from './score-paragraph';\nexport { default as setScore } from './set-score';\nexport { default as addScore } from './add-score';\nexport { default as addToParent } from './add-to-parent';\nexport { default as getOrInitScore } from './get-or-init-score';\nexport { default as scoreNode } from './score-node';\nexport { default as scoreContent } from './score-content';\nexport { default as findTopCandidate } from './find-top-candidate';\n","import {\n getScore,\n setScore,\n getOrInitScore,\n scoreCommas,\n} from 'extractors/generic/content/scoring';\n\nimport { CLEAN_CONDITIONALLY_TAGS } from './constants';\nimport { normalizeSpaces } from '../text';\nimport { linkDensity } from './index';\n\nfunction removeUnlessContent($node, $, weight) {\n // Explicitly save entry-content-asset tags, which are\n // noted as valuable in the Publisher guidelines. For now\n // this works everywhere. We may want to consider making\n // this less of a sure-thing later.\n if ($node.hasClass('entry-content-asset')) {\n return;\n }\n\n const content = normalizeSpaces($node.text());\n\n if (scoreCommas(content) < 10) {\n const pCount = $('p', $node).length;\n const inputCount = $('input', $node).length;\n\n // Looks like a form, too many inputs.\n if (inputCount > (pCount / 3)) {\n $node.remove();\n return;\n }\n\n const contentLength = content.length;\n const imgCount = $('img', $node).length;\n\n // Content is too short, and there are no images, so\n // this is probably junk content.\n if (contentLength < 25 && imgCount === 0) {\n $node.remove();\n return;\n }\n\n const density = linkDensity($node);\n\n // Too high of link density, is probably a menu or\n // something similar.\n // console.log(weight, density, contentLength)\n if (weight < 25 && density > 0.2 && contentLength > 75) {\n $node.remove();\n return;\n }\n\n // Too high of a link density, despite the score being\n // high.\n if (weight >= 25 && density > 0.5) {\n // Don't remove the node if it's a list and the\n // previous sibling starts with a colon though. That\n // means it's probably content.\n const tagName = $node.get(0).tagName;\n const nodeIsList = tagName === 'ol' || tagName === 'ul';\n if (nodeIsList) {\n const previousNode = $node.prev();\n if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {\n return;\n }\n }\n\n $node.remove();\n return;\n }\n\n const scriptCount = $('script', $node).length;\n\n // Too many script tags, not enough content.\n if (scriptCount > 0 && contentLength < 150) {\n $node.remove();\n return;\n }\n }\n}\n\n// Given an article, clean it of some superfluous content specified by\n// tags. Things like forms, ads, etc.\n//\n// Tags is an array of tag name's to search through. (like div, form,\n// etc)\n//\n// Return this same doc.\nexport default function cleanTags($article, $) {\n $(CLEAN_CONDITIONALLY_TAGS, $article).each((index, node) => {\n const $node = $(node);\n let weight = getScore($node);\n if (!weight) {\n weight = getOrInitScore($node, $);\n setScore($node, $, weight);\n }\n\n // drop node if its weight is < 0\n if (weight < 0) {\n $node.remove();\n } else {\n // deteremine if node seems like content\n removeUnlessContent($node, $, weight);\n }\n });\n\n return $;\n}\n","import URL from 'url';\n\nfunction absolutize($, rootUrl, attr, $content) {\n $(`[${attr}]`, $content).each((_, node) => {\n const url = node.attribs[attr];\n const absoluteUrl = URL.resolve(rootUrl, url);\n\n node.attribs[attr] = absoluteUrl;\n });\n}\n\nexport default function makeLinksAbsolute($content, $, url) {\n ['href', 'src'].forEach(attr => absolutize($, url, attr, $content));\n\n return $content;\n}\n","export function textLength(text) {\n return text.trim()\n .replace(/\\s+/g, ' ')\n .length;\n}\n\n// Determines what percentage of the text\n// in a node is link text\n// Takes a node, returns a float\nexport function linkDensity($node) {\n const totalTextLength = textLength($node.text());\n\n const linkText = $node.find('a').text();\n const linkLength = textLength(linkText);\n\n if (totalTextLength > 0) {\n return linkLength / totalTextLength;\n } else if (totalTextLength === 0 && linkLength > 0) {\n return 1;\n }\n\n return 0;\n}\n","import { withinComment } from 'utils/dom';\n\nfunction isGoodNode($node, maxChildren) {\n // If it has a number of children, it's more likely a container\n // element. Skip it.\n if ($node.children().length > maxChildren) {\n return false;\n }\n // If it looks to be within a comment, skip it.\n if (withinComment($node)) {\n return false;\n }\n\n return true;\n}\n\n// Given a a list of selectors find content that may\n// be extractable from the document. This is for flat\n// meta-information, like author, title, date published, etc.\nexport default function extractFromSelectors(\n $,\n selectors,\n maxChildren = 1,\n textOnly = true\n) {\n for (const selector of selectors) {\n const nodes = $(selector);\n\n // If we didn't get exactly one of this selector, this may be\n // a list of articles or comments. Skip it.\n if (nodes.length === 1) {\n const $node = $(nodes[0]);\n\n if (isGoodNode($node, maxChildren)) {\n let content;\n if (textOnly) {\n content = $node.text();\n } else {\n content = $node.html();\n }\n\n if (content) {\n return content;\n }\n }\n }\n }\n\n return null;\n}\n","// strips all tags from a string of text\nexport default function stripTags(text, $) {\n // Wrapping text in html element prevents errors when text\n // has no html\n const cleanText = $(`${text}`).text();\n return cleanText === '' ? text : cleanText;\n}\n","export default function withinComment($node) {\n const parents = $node.parents().toArray();\n const commentParent = parents.find((parent) => {\n const classAndId = `${parent.attribs.class} ${parent.attribs.id}`;\n return classAndId.includes('comment');\n });\n\n return commentParent !== undefined;\n}\n","// Given a node, determine if it's article-like enough to return\n// param: node (a cheerio node)\n// return: boolean\n\nexport default function nodeIsSufficient($node) {\n return $node.text().trim().length >= 100;\n}\n","// DOM manipulation\nexport { default as stripUnlikelyCandidates } from './strip-unlikely-candidates';\nexport { default as brsToPs } from './brs-to-ps';\nexport { default as paragraphize } from './paragraphize';\nexport { default as convertToParagraphs } from './convert-to-paragraphs';\nexport { default as convertNodeTo } from './convert-node-to';\nexport { default as cleanImages } from './clean-images';\nexport { default as markToKeep } from './mark-to-keep';\nexport { default as stripJunkTags } from './strip-junk-tags';\nexport { default as cleanHOnes } from './clean-h-ones';\nexport { default as cleanAttributes } from './clean-attributes';\nexport { default as removeEmpty } from './remove-empty';\nexport { default as cleanTags } from './clean-tags';\nexport { default as cleanHeaders } from './clean-headers';\nexport { default as rewriteTopLevel } from './rewrite-top-level';\nexport { default as makeLinksAbsolute } from './make-links-absolute';\nexport { textLength, linkDensity } from './link-density';\nexport { default as extractFromMeta } from './extract-from-meta';\nexport { default as extractFromSelectors } from './extract-from-selectors';\nexport { default as stripTags } from './strip-tags';\nexport { default as withinComment } from './within-comment';\nexport { default as nodeIsSufficient } from './node-is-sufficient';\nexport { default as isWordpress } from './is-wordpress';\n","export default function insertValues(strings, ...values) {\n if (values.length) {\n return strings.reduce((result, part, idx) => {\n let value = values[idx];\n\n if (value && typeof value.toString === 'function') {\n value = value.toString();\n } else {\n value = '';\n }\n\n return result + part + value;\n }, '');\n }\n\n return strings.join('');\n}\n","import insertValues from './insert-values'\n\nconst bodyPattern = /^\\n([\\s\\S]+)\\s{2}$/gm;\nconst trailingWhitespace = /\\s+$/;\n\nexport default function template(strings, ...values) {\n const compiled = insertValues(strings, ...values);\n let [body] = compiled.match(bodyPattern) || [];\n let indentLevel = /^\\s{0,4}(.+)$/g;\n\n if (!body) {\n body = compiled;\n indentLevel = /^\\s{0,2}(.+)$/g;\n }\n\n return body.split('\\n')\n .slice(1)\n .map((line) => {\n line = line.replace(indentLevel, '$1');\n\n if (trailingWhitespace.test(line)) {\n line = line.replace(trailingWhitespace, '');\n }\n\n return line;\n })\n .join('\\n');\n}\n","import template from './index';\n\nexport default function (hostname, name) {\n return template`\n export const ${name} = {\n domain: '${hostname}',\n\n title: {\n selectors: [\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n // enter author selectors\n ],\n },\n\n date_published: {\n selectors: [\n // enter selectors\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ]\n },\n }\n `;\n}\n","import template from './index';\n\nconst IGNORE = [\n 'url',\n 'domain',\n 'content',\n 'word_count',\n 'next_page_url',\n 'excerpt',\n 'direction',\n 'total_pages',\n 'rendered_pages',\n]\n\nfunction testFor(key, value, dir, file, url) {\n if (IGNORE.find(k => k === key)) return ''\n\n return template`\n it('returns the ${key}', async () => {\n // To pass this test, fill out the ${key} selector\n // in ${dir}/index.js.\n const html =\n fs.readFileSync('${file}');\n const articleUrl =\n '${url}';\n\n const { ${key} } =\n await Mercury.parse(articleUrl, html, { fallback: false });\n\n // Update these values with the expected values from\n // the article.\n assert.equal(${key}, ${value ? \"`\" + value + \"`\" : \"''\"})\n });\n `;\n}\n\nexport default function (file, url, dir, result, name) {\n return template`\n import assert from 'assert';\n import fs from 'fs';\n import URL from 'url';\n import cheerio from 'cheerio';\n\n import Mercury from 'mercury';\n import getExtractor from 'extractors/get-extractor';\n import { excerptContent } from 'utils/text';\n\n describe('${name}', () => {\n it('is selected properly', () => {\n // This test should be passing by default.\n // It sanity checks that the correct parser\n // is being selected for URLs from this domain\n const url =\n '${url}';\n const extractor = getExtractor(url);\n assert.equal(extractor.domain, URL.parse(url).hostname)\n })\n\n ${Reflect.ownKeys(result).map(k => testFor(k, result[k], dir, file, url)).join('\\n\\n')}\n\n it('returns the content', async () => {\n // To pass this test, fill out the content selector\n // in ${dir}/index.js.\n // You may also want to make use of the clean and transform\n // options.\n const html =\n fs.readFileSync('${file}');\n const url =\n '${url}';\n\n const { content } =\n await Mercury.parse(url, html, { fallback: false });\n\n const $ = cheerio.load(content || '');\n\n const first13 = excerptContent($('*').first().text(), 13)\n\n // Update these values with the expected values from\n // the article.\n assert.equal(first13, 'Add the first 13 words of the article here');\n });\n });\n `;\n}\n","import fs from 'fs'\nimport URL from 'url'\nimport inquirer from 'inquirer'\nimport ora from 'ora'\nimport { exec } from 'child_process'\n\nimport Mercury from '../dist/mercury'\nimport {\n stripJunkTags,\n makeLinksAbsolute,\n} from 'utils/dom'\nimport extractorTemplate from './templates/custom-extractor'\nimport extractorTestTemplate from './templates/custom-extractor-test'\n\nconst questions = [\n {\n type: 'input',\n name: 'website',\n message: 'Paste a url to an article you\\'d like to create or extend a parser for:',\n validate(value) {\n const { hostname } = URL.parse(value);\n if (hostname) return true;\n\n return false;\n },\n },\n];\n\ninquirer.prompt(questions).then((answers) => {\n scaffoldCustomParser(answers.website);\n});\n\nlet spinner;\nfunction confirm(fn, args, msg, newParser) {\n spinner = ora({ text: msg });\n spinner.start();\n const result = fn.apply(null, args);\n\n if (result && result.then) {\n result.then(r => savePage(r, args, newParser));\n } else {\n spinner.succeed();\n }\n\n return result;\n}\n\nfunction savePage($, [url], newParser) {\n const { hostname } = URL.parse(url);\n\n spinner.succeed();\n\n const filename = new Date().getTime();\n const file = `./fixtures/${hostname}/${filename}.html`;\n // fix http(s) relative links:\n makeLinksAbsolute($('*').first(), $, url)\n $('[src], [href]').each((index, node) => {\n const $node = $(node)\n const link = $node.attr('src')\n if (link && link.slice(0, 2) === '//') {\n $node.attr('src', `http:${link}`)\n }\n })\n const html = stripJunkTags($('*').first(), $, ['script']).html();\n\n fs.writeFileSync(file, html);\n\n const result = Mercury.parse(url, html).then((result) => {\n if (newParser) {\n confirm(generateScaffold, [url, file, result], 'Generating parser and tests');\n console.log(`Your custom site extractor has been set up. To get started building it, run\n yarn watch:test -- ${hostname}\n -- OR --\n npm run watch:test -- ${hostname}`)\n } else {\n console.log(`\n It looks like you already have a custom parser for this url.\n The page you linked to has been added to ${file}. Copy and paste\n the following code to use that page in your tests:\n const html = fs.readFileSync('${file}');`)\n }\n })\n}\n\nfunction generateScaffold(url, file, result) {\n const { hostname } = URL.parse(url);\n const extractor = extractorTemplate(hostname, extractorName(hostname))\n const extractorTest = extractorTestTemplate(file, url, getDir(url), result, extractorName(hostname))\n\n fs.writeFileSync(`${getDir(url)}/index.js`, extractor)\n fs.writeFileSync(`${getDir(url)}/index.test.js`, extractorTest)\n fs.appendFileSync(\n './src/extractors/custom/index.js',\n exportString(url),\n )\n exec(`npm run lint-fix-quiet -- ${getDir(url)}/*.js`)\n}\n\nfunction extractorName(hostname) {\n const name = hostname\n .split('.')\n .map(w => `${w.charAt(0).toUpperCase()}${w.slice(1)}`)\n .join('')\n return `${name}Extractor`\n}\n\nfunction exportString(url) {\n const { hostname } = URL.parse(url);\n return `export * from './${hostname}';`;\n}\n\nfunction confirmCreateDir(dir, msg) {\n if (!fs.existsSync(dir)) {\n confirm(fs.mkdirSync, [dir], msg);\n }\n}\n\nfunction scaffoldCustomParser(url) {\n const dir = getDir(url);\n const { hostname } = URL.parse(url);\n let newParser = false\n\n if (!fs.existsSync(dir)) {\n newParser = true\n confirmCreateDir(dir, `Creating ${hostname} directory`);\n confirmCreateDir(`./fixtures/${hostname}`, 'Creating fixtures directory');\n }\n\n confirm(Mercury.fetchResource, [url], 'Fetching fixture', newParser);\n}\n\nfunction getDir(url) {\n const { hostname } = URL.parse(url);\n return `./src/extractors/custom/${hostname}`;\n}\n"],"names":["ex","_interopDefault","require$$20","require$$19","require$$18","require$$17","require$$16","require$$15","require$$14","require$$13","require$$12","require$$11","require$$10","require$$9","require$$8","require$$7","require$$6","require$$5","require$$4","require$$3","require$$2","require$$1","require$$0","range","map","_regeneratorRuntime","mark","arguments","length","undefined","wrap","_context","prev","next","start","end","stop","_marked","_ref","hostname","badUrl","error","messages","BAD_CONTENT_TYPES","join","options","resolve","reject","request","err","response","body","statusMessage","statusCode","parseNon2xx","headers","contentType","_response$headers","contentLength","BAD_CONTENT_TYPES_RE","test","MAX_CONTENT_LENGTH","_asyncToGenerator","url","parsedUrl","_ref3","URL","parse","encodeURI","_extends","REQUEST_HEADERS","timeout","FETCH_TIMEOUT","encoding","jar","gzip","followAllRedirects","sent","validateResponse","abrupt","t0","Errors","_callee","_x2","_x3","apply","$","from","to","each","_","node","$node","attr","value","removeAttr","convertMetaProp","img","_Reflect$ownKeys","attribs","forEach","IS_LINK","IS_IMAGE","index","type","root","find","contents","filter","isComment","remove","TAGS_TO_REMOVE","cleanComments","create","preparedResponse","validResponse","result","_this","generateDoc","includes","cheerio","load","content","normalizeWhitespace","children","normalizeMetaTags","convertLazyLoadedImages","clean","extractor","domains","reduce","acc","domain","supportedDomains","merge","concat","_toConsumableArray","selectors","transforms","noscript","author","title","date_published","h1","$children","get","tagName","dek","defaultCleaner","parents","$parent","prepend","$tweetContainer","append","tweets","replaceWith","s","src","replace","width","lead_image_url","next_page_url","excerpt","h2","split","youtubeId","JSON","data","sources","$img","iframe","decodeURIComponent","ytRe","thumb","match","_thumb$match2","_slicedToArray","_thumb$match","clone","BloggerExtractor","NYMagExtractor","WikipediaExtractor","TwitterExtractor","NYTimesExtractor","TheAtlanticExtractor","NewYorkerExtractor","WiredExtractor","MSNExtractor","YahooExtractor","BuzzfeedExtractor","WikiaExtractor","LittleThingsExtractor","PoliticoExtractor","DeadspinExtractor","BroadwayWorldExtractor","ApartmentTherapyExtractor","MediumExtractor","_Object$keys","CustomExtractors","key","mergeSupportedDomains","REMOVE_ATTRS","selector","WHITELIST_ATTRS","REMOVE_EMPTY_TAGS","tag","HEADER_TAGS","POSITIVE_SCORE_HINTS","NEGATIVE_SCORE_HINTS","BLOCK_LEVEL_TAGS","UNLIKELY_CANDIDATES_BLACKLIST","candidatesBlacklist","UNLIKELY_CANDIDATES_WHITELIST","candidatesWhitelist","not","id","classes","CANDIDATES_WHITELIST","classAndId","CANDIDATES_BLACKLIST","element","nextElement","collapsing","paragraphize","br","nextSibling","sibling","BLOCK_LEVEL_TAGS_RE","appendTo","p","div","$div","DIV_TO_P_BLOCK_TAGS","convertable","convertNodeTo","span","$span","brsToPs$$1","convertDivs","convertSpans","_$node$get","attribString","parseInt","height","SPACER_RE","$article","cleanForHeight","removeSpacers","article","tags","KEEP_SELECTORS","protocol","_URL$parse","addClass","KEEP_CLASS","STRIP_OUTPUT_TAGS","removeClass","$hOnes","WHITELIST_ATTRS_RE","_defineProperty","parent","$p","text","trim","NON_TOP_CANDIDATE_TAGS$1","PHOTO_HINTS$1","POSITIVE_SCORE_HINTS$1","NEGATIVE_SCORE_HINTS$1","UNLIKELY_CANDIDATES_BLACKLIST$1","UNLIKELY_CANDIDATES_WHITELIST$1","POSITIVE_SCORE_RE$1","score","NEGATIVE_SCORE_RE$1","PHOTO_HINTS_RE$1","READABILITY_ASSET$1","textLength","chunks","idkRe","lengthBonus","min","Math","max","scoreCommas","scoreLength","slice","amount","getOrInitScore$$1","setScore","e","addScore$$1","getScore","scoreNode$$1","weightNodes","getWeight","addToParent$$1","PARAGRAPH_SCORE_TAGS$1","CHILD_CONTENT_TAGS$1","BAD_TAGS$1","convertSpans$1","addScoreTo","rawScore","HNEWS_CONTENT_SELECTORS$1","parentSelector","_ref2","childSelector","scorePs","NORMALIZE_RE","regexList","re","matchRe","exec","PAGE_IN_HREF_RE","matches","pageNum","segment","firstSegmentHasLetters","IS_DIGIT_RE","goodSegment","toLowerCase","parsed","host","path","reverse","rawSegment","_segment$split2","_segment$split","possibleSegment","fileExt","IS_ALPHA_RE","HAS_ALPHA_RE","isGoodSegment","push","cleanedSegments","words","$candidate","topScore","NON_TOP_CANDIDATE_TAGS_RE$1","$sibling","siblingScore","wrappingDiv","linkDensity","density","contentBonus","newScore","siblingScoreThreshold","siblingContent","siblingContentLength","hasSentenceEnd","first","mergeSiblings","weight","hasClass","normalizeSpaces","inputCount","pCount","imgCount","nodeIsList","previousNode","scriptCount","CLEAN_CONDITIONALLY_TAGS","removeUnlessContent","HEADER_TAG_LIST","header","$header","prevAll","rootUrl","$content","absoluteUrl","linkText","totalTextLength","linkLength","metaNames","cachedNames","name","indexOf","_step","nodes","toArray","values","cleanTags$$1","metaValue","stripTags","v","_getIterator","foundNames","_iteratorNormalCompletion","_iterator","done","_loop","_typeof","_ret","_didIteratorError","_iteratorError","return","maxChildren","withinComment","isGoodNode","textOnly","html","cleanText","class","IS_WP_SELECTOR","months","timestamp1","timestamp2","allMonths","CLEAN_AUTHOR_RE","leadImageUrl","validUrl","isWebUri","excerptContent","TEXT_LINK_RE","dekText","dateString","SPLIT_DATE_STRING","TIME_MERIDIAN_DOTS_RE","TIME_MERIDIAN_SPACE_RE","CLEAN_DATE_STRING_RE","MS_DATE_STRING","SEC_DATE_STRING","moment","date","isValid","cleanDateString","toISOString","_ref$cleanConditional","cleanConditionally","_ref$title","_ref$url","_ref$defaultCleaner","rewriteTopLevel$$1","cleanImages","markToKeep","stripJunkTags","cleanHOnes$$1","cleanHeaders","makeLinksAbsolute","removeEmpty","cleanAttributes","TITLE_SPLITTERS_RE","resolveSplitTitle","splitTitle","titleText","termCounts","_Reflect$ownKeys$redu2","_Reflect$ownKeys$redu","maxTerm","termCount","splitEnds","longestEnd","DOMAIN_ENDINGS_RE","wuzzy","levenshtein","startSlug","nakedDomain","startSlugRatio","endSlug","endSlugRatio","extractBreadcrumbTitle","newTitle","cleanDomainFromTitle","cleanAuthor","clean$1","cleanDek","cleanDatePublished","extractCleanNode","cleanTitle$$1","opts","stripUnlikelyCandidates","convertToParagraphs$$1","scoreContent$$1","findTopCandidate$$1","defaultOpts","extract","getContentNode","nodeIsSufficient","cleanAndReturnNode","k","extractBestNode","metaCache","extractFromMeta$$1","STRONG_TITLE_META_TAGS","extractFromSelectors$$1","STRONG_TITLE_SELECTORS","WEAK_TITLE_META_TAGS","WEAK_TITLE_SELECTORS","bylineRe","AUTHOR_META_TAGS","AUTHOR_MAX_LENGTH","AUTHOR_SELECTORS","BYLINE_SELECTORS_RE","_ref4","regex","abbrevMonthsStr","datePublished","DATE_PUBLISHED_META_TAGS","DATE_PUBLISHED_SELECTORS","extractFromUrl","DATE_PUBLISHED_URL_RES","POSITIVE_LEAD_IMAGE_URL_HINTS","NEGATIVE_LEAD_IMAGE_URL_HINTS","POSITIVE_LEAD_IMAGE_URL_HINTS_RE","NEGATIVE_LEAD_IMAGE_URL_HINTS_RE","GIF_RE","JPG_RE","$figParent","$gParent","getSig","parseFloat","area","round","$imgs","LEAD_IMAGE_URL_META_TAGS","imageUrl","cleanUrl","imgs","scoreImageUrl","scoreAttr","scoreByParents","scoreBySibling","scoreByDimensions","scoreByPosition","imgScores","topUrl","LEAD_IMAGE_URL_SELECTORS","href","articleUrl","SequenceMatcher","ratio","similarity","diffPercent","diffModifier","linkTextAsNum","isWp","EXTRANEOUS_LINK_HINTS$1","EXTRANEOUS_LINK_HINTS_RE$1","$link","_Array$from","makeSig$1","positiveMatch","PAGE_RE","parentData","negativeMatch","NEGATIVE_SCORE_RE","POSITIVE_SCORE_RE","linkData","PREV_LINK_TEXT_RE$1","baseUrl","previousUrls","linkHost","DIGIT_RE$2","fragment","baseRegex","NEXT_LINK_TEXT_RE$1","CAP_LINK_TEXT_RE$1","links","_ref$previousUrls","makeBaseRegex","isWordpress","possiblePages","link","removeAnchor","shouldScore","makeSig","pageNumFromUrl","scoreBaseUrl","scoreNextLinkText","scoreCapLinks","scorePrevLink","scoreByParents$1","scoreExtraneousLinks","scorePageInLink","scoreLinkText","scoreSimilarity","possiblePage","scoredPages","articleBaseUrl","scoreLinks","scoredLinks","scoredLink","topPage","parseDomain","$canonical","CANONICAL_META_SELECTORS","metaUrl","maxLength","ellipse","EXCERPT_META_SELECTORS","shortContent","GenericTitleExtractor","GenericDatePublishedExtractor","GenericAuthorExtractor","GenericContentExtractor","bind","GenericLeadImageUrlExtractor","GenericDekExtractor","GenericNextPageUrlExtractor","url_and_domain","GenericUrlExtractor","GenericExcerptExtractor","word_count","GenericWordCountExtractor","direction","getDirection","_url_and_domain","_parsedUrl","Extractors","baseDomain","GenericExtractor","$matches","Array","isArray","_selector","extractionOpts","_opts$extractHtml","extractHtml","_extractionOpts$defau","findMatchingSelector","matchingSelector","transformElements","cleanBySelectors","Cleaners","_matchingSelector","_opts$fallback","fallback","select","contentOnly","_opts","extractedTitle","extractResult","_content","Extractor","extractorOpts","nextPageResult","pages","RootExtractor","total_pages","pages_rendered","_x","fetchAllPages","_result","_opts$fetchAllPages","validateUrl","getExtractor","rendered_pages","fetchResource","_context2","_callee2","_this2","Mercury","RegExp","REMOVE_ATTR_SELECTORS","REMOVE_ATTR_LIST","REMOVE_EMPTY_SELECTORS","brsToPs","removeAllButWhitelist","NON_TOP_CANDIDATE_TAGS","NON_TOP_CANDIDATE_TAGS_RE","HNEWS_CONTENT_SELECTORS","PHOTO_HINTS","PHOTO_HINTS_RE","READABILITY_ASSET","DIGIT_RE","BR_TAGS_RE","BR_TAG_RE","UNLIKELY_RE","PARAGRAPH_SCORE_TAGS","CHILD_CONTENT_TAGS","BAD_TAGS","HTML_OR_BODY_RE","scoreParagraph","addScore","getOrInitScore","addToParent","scoreNode","SENTENCE_END_RE","absolutize","commentParent","insertValues","strings","part","idx","toString","bodyPattern","trailingWhitespace","template","compiled","indentLevel","line","IGNORE","testFor","dir","file","questions","inquirer","prompt","then","answers","website","spinner","confirm","fn","args","msg","newParser","ora","savePage","r","succeed","filename","Date","getTime","writeFileSync","generateScaffold","log","extractorTemplate","extractorName","extractorTest","extractorTestTemplate","getDir","appendFileSync","exportString","w","charAt","toUpperCase","confirmCreateDir","fs","existsSync","mkdirSync","scaffoldCustomParser"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAEA,0BAAA,CAA0BA,EAA1B,CAA8B,CAAE,WAAe,UAAA,mCAAOA,EAAP,KAAc,QAArB,EAAkC,cAAnC,CAAsDA,GAAG,SAAH,CAAtD,CAAsEA,EAA7E,CAAkF,CAElH,wBAA0BC,kBAAgBC,WAAhB,CAA1B,CACA,eAAeD,kBAAgBE,QAAhB,CAAf,CACA,sBAAwBF,kBAAgBG,gBAAhB,CAAxB,CACA,UAAUH,kBAAgBI,GAAhB,CAAV,CACA,cAAcJ,kBAAgBK,OAAhB,CAAd,CACA,aAAeL,kBAAgBM,OAAhB,CAAf,CACA,cAAcN,kBAAgBO,OAAhB,CAAd,CACA,uBAAuBP,kBAAgBQ,gBAAhB,CAAvB,CACA,iBAAmBR,kBAAgBS,IAAhB,CAAnB,CACA,uBAAyBT,kBAAgBU,iBAAhB,CAAzB,CACA,qBAAqBV,kBAAgBW,cAAhB,CAArB,CACA,sBAAsBX,kBAAgBY,eAAhB,CAAtB,CACA,mBAAmBZ,kBAAgBa,YAAhB,CAAnB,CACA,oBAAsBb,kBAAgBc,cAAhB,CAAtB,CACA,cAAcd,kBAAgBe,OAAhB,CAAd,CACA,eAAef,kBAAgBgB,QAAhB,CAAf,CACA,aAAahB,kBAAgBiB,MAAhB,CAAb,CACA,YAAYjB,kBAAgBkB,KAAhB,CAAZ,CACA,cAAclB,kBAAgBmB,OAAhB,CAAd,CACA,gBAAkBnB,kBAAgBoB,IAAhB,CAAlB,CACA,gBAAgBpB,kBAAgBqB,SAAhB,CAAhB,CAEA,YAAc,CAACC,KAAD,EAAQC,GAAR,CAAYC,oBAAoBC,IAAhC,CAAd,CAEA,cAAA,EAAiB,CACf,UAAYC,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,CAAhF,CACA,QAAUA,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,CAA9E,CACA,2BAA2BG,IAApB,CAAyB,eAAA,CAAgBC,QAAhB,CAA0B,CACxD,MAAO,CAAP,CAAU,CACR,OAAQA,SAASC,IAAT,CAAgBD,SAASE,IAAjC,EACE,MAAA,CACE,GAAI,EAAEC,OAASC,GAAX,CAAJ,CAAqB,CACnBJ,SAASE,IAAT,CAAgB,CAAhB,CACA,MACD,CAEDF,SAASE,IAAT,CAAgB,CAAhB,CACA,cAAgB,CAAhB,CAEF,MAAA,CACEF,SAASE,IAAT,CAAgB,CAAhB,CACA,MAEF,MAAA,CACA,IAAK,KAAL,CACE,gBAAgBG,IAAT,EAAP,CAhBJ,CAkBD,CACF,CArBM,CAqBJC,QAAQ,CAAR,CArBI,CAqBQ,IArBR,CAAP,CAsBD;AAGD,oBAAA,CAAqBC,IAArB,CAA2B,CACzB,aAAeA,KAAKC,QAApB;AAGA,MAAO,CAAC,CAACA,QAAT,CACD,CAED,WAAa,CACXC,OAAQ,CACNC,MAAO,IADD,CAENC,SAAU,gGAFJ,CADG,CAAb,CAOA,oBAAsB,CACpB,aAAc,6CADM,CAAtB;AAKA,kBAAoB,KAApB;AAGA,sBAAwB,CAAC,YAAD,CAAe,WAAf,CAA4B,YAA5B,CAA0C,WAA1C,CAAxB,CAEA,yBAA2B,UAAA,CAAW,KAAOC,kBAAkBC,IAAlB,CAAuB,GAAvB,CAAP,CAAqC,IAAhD,CAAsD,GAAtD,CAA3B;;AAIA,uBAAyB,OAAzB;;;AAMA,YAAA,CAAaC,OAAb,CAAsB,CACpB,mBAAO,CAAa,SAAUC,OAAV,CAAmBC,MAAnB,CAA2B,CAC7CC,UAAQH,OAAR,CAAiB,SAAUI,GAAV,CAAeC,QAAf,CAAyBC,IAAzB,CAA+B,CAC9C,GAAIF,GAAJ,CAAS,CACPF,OAAOE,GAAP,EACD,CAFD,IAEO,CACLH,QAAQ,CAAEK,KAAMA,IAAR,CAAcD,SAAUA,QAAxB,CAAR,EACD,CACF,CAND,EAOD,CARM,CAAP,CASD;;;;AAOD,yBAAA,CAA0BA,QAA1B,CAAoC,CAClC,gBAAkBvB,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,KAAtF;AAGA,GAAIuB,SAASE,aAAT,GAA2B,IAA/B,CAAqC,CACnC,GAAI,CAACF,SAASG,UAAd,CAA0B,CACxB,eAAM,CAAU,mDAAqDH,SAAST,KAAxE,CAAN,CACD,CAFD,QAEW,CAACa,WAAL,CAAkB,CACvB,eAAM,CAAU,+CAAiDJ,SAASG,UAA1D,CAAuE,oEAAjF,CAAN,CACD,CACF,CAED,sBAAwBH,SAASK,OAAjC,CACIC,YAAcC,kBAAkB,cAAlB,CADlB,CAEIC,cAAgBD,kBAAkB,gBAAlB,CAFpB;AAMA,GAAIE,qBAAqBC,IAArB,CAA0BJ,WAA1B,CAAJ,CAA4C,CAC1C,eAAM,CAAU,sCAAwCA,WAAxC,CAAsD,sBAAhE,CAAN,CACD;AAGD,GAAIE,cAAgBG,kBAApB,CAAwC,CACtC,eAAM,CAAU,sEAAwEA,kBAAxE,CAA6F,GAAvG,CAAN,CACD,CAED,WAAA,CACD;;;;;;;AAYD,oBAAuB,UAAY,CACjC,UAAYC,kBAAkBrC,oBAAoBC,IAApB,CAAyB,gBAAA,CAAiBqC,GAAjB,CAAsBC,SAAtB,CAAiC,CACtF,WAAA,CAAaC,KAAb,CAAoBf,QAApB,CAA8BC,IAA9B,CAEA,2BAA2BrB,IAApB,CAAyB,iBAAA,CAAkBC,QAAlB,CAA4B,CAC1D,MAAO,CAAP,CAAU,CACR,OAAQA,SAASC,IAAT,CAAgBD,SAASE,IAAjC,EACE,MAAA,CACE+B,UAAYA,WAAaE,MAAIC,KAAJ,CAAUC,UAAUL,GAAV,CAAV,CAAzB,CAEAlB,QAAU,CACRkB,IAAKC,SADG,CAERT,QAASc,WAAS,EAAT,CAAaC,eAAb,CAFD,CAGRC,QAASC,aAHD;;AAMRC,SAAU,IANF;AAQRC,IAAK,IARG;AAURC,KAAM,IAVE;AAYRC,mBAAoB,IAZZ,CAAV,CAcA7C,SAASE,IAAT,CAAgB,CAAhB,CACA,WAAWY,OAAJ,CAAP,CAEF,MAAA,CACEoB,MAAQlC,SAAS8C,IAAjB,CACA3B,SAAWe,MAAMf,QAAjB,CACAC,KAAOc,MAAMd,IAAb,CACApB,SAASC,IAAT,CAAgB,CAAhB,CAEA8C,iBAAiB5B,QAAjB,EACA,gBAAgB6B,MAAT,CAAgB,QAAhB,CAA0B,CAAE5B,KAAMA,IAAR,CAAcD,SAAUA,QAAxB,CAA1B,CAAP,CAEF,OAAA,CACEnB,SAASC,IAAT,CAAgB,EAAhB,CACAD,SAASiD,EAAT,CAAcjD,SAAS,OAAT,EAAkB,CAAlB,CAAd,CACA,gBAAgBgD,MAAT,CAAgB,QAAhB,CAA0BE,OAAOzC,MAAjC,CAAP,CAEF,OAAA,CACA,IAAK,KAAL,CACE,gBAAgBJ,IAAT,EAAP,CArCJ,CAuCD,CACF,CA1CM,CA0CJ8C,OA1CI,CA0CK,IA1CL,CA0CW,CAAC,CAAC,CAAD,CAAI,EAAJ,CAAD,CA1CX,CAAP,CA2CD,CA9C6B,CAAlB,CAAZ,CAgDA,sBAAA,CAAuBC,GAAvB,CAA4BC,GAA5B,CAAiC,CAC/B,aAAaC,KAAN,CAAY,IAAZ,CAAkB1D,SAAlB,CAAP,CACD,CAED,oBAAA,CACD,CAtDqB,EAAtB,CAwDA,wBAAA,CAAyB2D,CAAzB,CAA4BC,OAA5B,CAAkCC,EAAlC,CAAsC,CACpCF,EAAE,QAAUC,OAAV,CAAiB,GAAnB,EAAwBE,IAAxB,CAA6B,SAAUC,CAAV,CAAaC,IAAb,CAAmB,CAC9C,UAAYL,EAAEK,IAAF,CAAZ,CAEA,UAAYC,MAAMC,IAAN,CAAWN,OAAX,CAAZ,CACAK,MAAMC,IAAN,CAAWL,EAAX,CAAeM,KAAf,EACAF,MAAMG,UAAN,CAAiBR,OAAjB,EACD,CAND,EAQA,QAAA,CACD;;;;;;AASD,0BAAA,CAA2BD,CAA3B,CAA8B,CAC5BA,EAAIU,gBAAgBV,CAAhB,CAAmB,SAAnB,CAA8B,OAA9B,CAAJ,CACAA,EAAIU,gBAAgBV,CAAhB,CAAmB,UAAnB,CAA+B,MAA/B,CAAJ,CACA,QAAA,CACD,CAED,YAAc,UAAA,CAAW,WAAX,CAAwB,GAAxB,CAAd,CACA,aAAe,UAAA,CAAW,kBAAX,CAA+B,GAA/B,CAAf,CAEA,mBAAqB,CAAC,QAAD,CAAW,OAAX,CAAoB,MAApB,EAA4B1C,IAA5B,CAAiC,GAAjC,CAArB;;;;;AAOA,gCAAA,CAAiC0C,CAAjC,CAAoC,CAClCA,EAAE,KAAF,EAASG,IAAT,CAAc,SAAUC,CAAV,CAAaO,GAAb,CAAkB,CAC9BC,mBAAiBD,IAAIE,OAArB,EAA8BC,OAA9B,CAAsC,SAAUP,IAAV,CAAgB,CACpD,UAAYI,IAAIE,OAAJ,CAAYN,IAAZ,CAAZ,CAEA,GAAIA,OAAS,KAAT,EAAkBQ,QAAQzC,IAAR,CAAakC,KAAb,CAAlB,EAAyCQ,SAAS1C,IAAT,CAAckC,KAAd,CAA7C,CAAmE,CACjER,EAAEW,GAAF,EAAOJ,IAAP,CAAY,KAAZ,CAAmBC,KAAnB,EACD,CACF,CAND,EAOD,CARD,EAUA,QAAA,CACD,CAED,kBAAA,CAAmBS,KAAnB,CAA0BZ,IAA1B,CAAgC,CAC9B,YAAYa,IAAL,GAAc,SAArB,CACD,CAED,sBAAA,CAAuBlB,CAAvB,CAA0B,CACxBA,EAAEmB,IAAF,GAASC,IAAT,CAAc,GAAd,EAAmBC,QAAnB,GAA8BC,MAA9B,CAAqCC,SAArC,EAAgDC,MAAhD,GAEA,QAAA,CACD,CAED,cAAA,CAAexB,CAAf,CAAkB,CAChBA,EAAEyB,cAAF,EAAkBD,MAAlB,GAEAxB,EAAI0B,cAAc1B,CAAd,CAAJ,CACA,QAAA,CACD,CAED,aAAe;;;;;;AAQb2B,OAAQ,eAAA,CAAgBlD,GAAhB,CAAqBmD,gBAArB,CAAuClD,SAAvC,CAAkD,CACxD,UAAY,IAAZ,CAEA,yBAAyBvC,oBAAoBC,IAApB,CAAyB,gBAAA,EAAmB,CACnE,UAAA,CAAYyF,aAAZ,CACA,2BAA2BrF,IAApB,CAAyB,iBAAA,CAAkBC,QAAlB,CAA4B,CAC1D,MAAO,CAAP,CAAU,CACR,OAAQA,SAASC,IAAT,CAAgBD,SAASE,IAAjC,EACE,MAAA,CACEmF,OAAS,MAAT,CAEA,GAAI,CAACF,gBAAL,CAAuB,CACrBnF,SAASE,IAAT,CAAgB,CAAhB,CACA,MACD,CAEDkF,cAAgB,CACd/D,cAAe,IADD,CAEdC,WAAY,GAFE,CAGdE,QAAS,CACP,eAAgB,WADT,CAEP,iBAAkB,GAFX,CAHK,CAAhB,CAUA6D,OAAS,CAAEjE,KAAM+D,gBAAR,CAA0BhE,SAAUiE,aAApC,CAAT,CACApF,SAASE,IAAT,CAAgB,CAAhB,CACA,MAEF,MAAA,CACEF,SAASE,IAAT,CAAgB,CAAhB,CACA,uBAAuB8B,GAAhB,CAAqBC,SAArB,CAAP,CAEF,MAAA,CACEoD,OAASrF,SAAS8C,IAAlB,CAEF,MAAA,CACE,GAAI,CAACuC,OAAO3E,KAAZ,CAAmB,CACjBV,SAASE,IAAT,CAAgB,EAAhB,CACA,MACD,CAED,gBAAgB8C,MAAT,CAAgB,QAAhB,CAA0BqC,MAA1B,CAAP,CAEF,OAAA,CACE,gBAAgBrC,MAAT,CAAgB,QAAhB,CAA0BsC,MAAMC,WAAN,CAAkBF,MAAlB,CAA1B,CAAP,CAEF,OAAA,CACA,IAAK,KAAL,CACE,gBAAgBhF,IAAT,EAAP,CA3CJ,CA6CD,CACF,CAhDM,CAgDJ8C,OAhDI,CAgDKmC,KAhDL,CAAP,CAiDD,CAnDwB,CAAlB,GAAP,CAoDD,CA/DY,CAgEbC,YAAa,oBAAA,CAAqBhF,IAArB,CAA2B,CACtC,YAAcA,KAAKa,IAAnB,CACID,SAAWZ,KAAKY,QADpB,CAEA,gBAAkBA,SAASK,OAAT,CAAiB,cAAjB,CAAlB;;AAKA,GAAI,CAACC,YAAY+D,QAAZ,CAAqB,MAArB,CAAD,EAAiC,CAAC/D,YAAY+D,QAAZ,CAAqB,MAArB,CAAtC,CAAoE,CAClE,eAAM,CAAU,qCAAV,CAAN,CACD,CAED,MAAQC,UAAQC,IAAR,CAAaC,OAAb,CAAsB,CAAEC,oBAAqB,IAAvB,CAAtB,CAAR,CAEA,GAAIrC,EAAEmB,IAAF,GAASmB,QAAT,GAAoBhG,MAApB,GAA+B,CAAnC,CAAsC,CACpC,eAAM,CAAU,kCAAV,CAAN,CACD,CAED0D,EAAIuC,kBAAkBvC,CAAlB,CAAJ,CACAA,EAAIwC,wBAAwBxC,CAAxB,CAAJ,CACAA,EAAIyC,MAAMzC,CAAN,CAAJ,CAEA,QAAA,CACD,CAvFY,CAAf,CA0FA,UAAY,cAAA,CAAe0C,SAAf,CAA0BC,OAA1B,CAAmC,CAC7C,eAAeC,MAAR,CAAe,SAAUC,GAAV,CAAeC,MAAf,CAAuB,CAC3CD,IAAIC,MAAJ,EAAcJ,SAAd,CACA,UAAA,CACD,CAHM,CAGJ,EAHI,CAAP,CAID,CALD,CAOA,8BAAA,CAA+BA,SAA/B,CAA0C,CACxC,iBAAiBK,gBAAV,CAA6BC,MAAMN,SAAN,CAAiB,CAACA,UAAUI,MAAX,EAAmBG,MAAnB,CAA0BC,mBAAmBR,UAAUK,gBAA7B,CAA1B,CAAjB,CAA7B,CAA2HC,MAAMN,SAAN,CAAiB,CAACA,UAAUI,MAAX,CAAjB,CAAlI,CACD,CAED,qBAAuB,CACrBA,OAAQ,cADa,CAErBV,QAAS;;;AAIPe,UAAW,CAAC,wBAAD,CAJJ;AAOPV,MAAO,EAPA;AAUPW,WAAY,CACVC,SAAU,KADA,CAVL,CAFY,CAiBrBC,OAAQ,CACNH,UAAW,CAAC,mBAAD,CADL,CAjBa,CAqBrBI,MAAO,CACLJ,UAAW,CAAC,gBAAD,CADN,CArBc,CAyBrBK,eAAgB,CACdL,UAAW,CAAC,kBAAD,CADG,CAzBK,CAAvB,CA8BA,mBAAqB,CACnBL,OAAQ,WADW,CAEnBV,QAAS;AAEPe,UAAW,CAAC,qBAAD,CAAwB,cAAxB,CAAwC,iBAAxC,CAFJ;AAKPV,MAAO,CAAC,KAAD,CAAQ,uBAAR,CALA;;;;;;AAaPW,WAAY;AAEVK,GAAI,IAFM;AAKVJ,SAAU,iBAAA,CAAkB/C,KAAlB,CAAyB,CACjC,cAAgBA,MAAMgC,QAAN,EAAhB,CACA,GAAIoB,UAAUpH,MAAV,GAAqB,CAArB,EAA0BoH,UAAUC,GAAV,CAAc,CAAd,EAAiBC,OAAjB,GAA6B,KAA3D,CAAkE,CAChE,MAAO,QAAP,CACD,CAED,WAAA,CACD,CAZS,CAbL,CAFU,CA+BnBL,MAAO,CACLJ,UAAW,CAAC,uBAAD,CAA0B,qBAA1B,CAAiD,IAAjD,CADN,CA/BY,CAmCnBG,OAAQ,CACNH,UAAW,CAAC,aAAD,CAAgB,sBAAhB,CADL,CAnCW,CAuCnBU,IAAK,CACHV,UAAW,CAAC,sBAAD,CADR,CAvCc,CA2CnBK,eAAgB,CACdL,UAAW,CAAC,CAAC,kCAAD,CAAqC,UAArC,CAAD,CAAmD,wBAAnD,CADG,CA3CG,CAArB,CAgDA,uBAAyB,CACvBL,OAAQ,eADe,CAEvBV,QAAS,CACPe,UAAW,CAAC,kBAAD,CADJ,CAGPW,eAAgB,KAHT;AAMPV,WAAY,CACV,eAAgB,mBAAA,CAAoB9C,KAApB,CAA2B,CACzC,YAAcA,MAAMyD,OAAN,CAAc,UAAd,CAAd;AAEA,GAAIC,QAAQ1B,QAAR,CAAiB,KAAjB,EAAwBhG,MAAxB,GAAmC,CAAvC,CAA0C,CACxC0H,QAAQC,OAAR,CAAgB3D,KAAhB,EACD,CACF,CAPS,CAQV,mBAAoB,YARV,CASV,WAAY,QATF,CANL;AAmBPmC,MAAO,CAAC,iBAAD,CAAoB,oCAApB,CAA0D,MAA1D,CAAkE,SAAlE,CAnBA,CAFc,CAyBvBa,OAAQ,wBAzBe,CA2BvBC,MAAO,CACLJ,UAAW,CAAC,UAAD,CADN,CA3BgB,CA+BvBK,eAAgB,CACdL,UAAW,CAAC,sBAAD,CADG,CA/BO,CAAzB,CAqCA,qBAAuB,CACrBL,OAAQ,aADa,CAGrBV,QAAS,CACPgB,WAAY;;;;AAKV,wBAAyB,0BAAA,CAA2B9C,KAA3B,CAAkCN,CAAlC,CAAqC,CAC5D,WAAaM,MAAMc,IAAN,CAAW,QAAX,CAAb,CACA,oBAAsBpB,EAAE,iCAAF,CAAtB,CACAkE,gBAAgBC,MAAhB,CAAuBC,MAAvB,EACA9D,MAAM+D,WAAN,CAAkBH,eAAlB,EACD,CAVS;;AAcVI,EAAG,MAdO,CADL,CAkBPnB,UAAW,CAAC,uBAAD,CAlBJ,CAoBPW,eAAgB,KApBT,CAsBPrB,MAAO,CAAC,qBAAD,CAAwB,QAAxB,CAAkC,sBAAlC,CAtBA,CAHY,CA4BrBa,OAAQ,CACNH,UAAW,CAAC,kCAAD,CADL,CA5Ba,CAgCrBK,eAAgB,CACdL,UAAW,CAAC,CAAC,4CAAD,CAA+C,cAA/C,CAAD,CADG,CAhCK,CAAvB,CAsCA,qBAAuB,CACrBL,OAAQ,iBADa,CAGrBS,MAAO,CACLJ,UAAW,CAAC,aAAD,CAAgB,aAAhB,CADN,CAHc,CAOrBG,OAAQ,CACNH,UAAW,CAAC,CAAC,qBAAD,CAAwB,OAAxB,CAAD,CAAmC,WAAnC,CAAgD,SAAhD,CADL,CAPa,CAWrBf,QAAS,CACPe,UAAW,CAAC,cAAD,CAAiB,eAAjB,CADJ,CAGPW,eAAgB,KAHT,CAKPV,WAAY,CACV,aAAc,iBAAA,CAAkB9C,KAAlB,CAAyB,CACrC,QAAUA,MAAMC,IAAN,CAAW,KAAX,CAAV;;;;;;;;;AAUA,UAAY,GAAZ,CAEAgE,IAAMA,IAAIC,OAAJ,CAAY,UAAZ,CAAwBC,KAAxB,CAAN,CACAnE,MAAMC,IAAN,CAAW,KAAX,CAAkBgE,GAAlB,EACD,CAhBS,CALL,CAwBP9B,MAAO,CAAC,KAAD,CAAQ,qBAAR,CAA+B,2BAA/B,CAA4D,kBAA5D,CAAgF,mBAAhF,CAAqG,QAArG,CAA+G,kBAA/G,CAAmI,SAAnI,CAxBA,CAXY,CAsCrBe,eAAgB,IAtCK,CAwCrBkB,eAAgB,IAxCK,CA0CrBb,IAAK,IA1CgB,CA4CrBc,cAAe,IA5CM,CA8CrBC,QAAS,IA9CY,CAAvB;;AAmDA,yBAA2B,CACzB9B,OAAQ,qBADiB,CAEzBS,MAAO,CACLJ,UAAW,CAAC,QAAD,CADN,CAFkB,CAMzBG,OAAQ,CACNH,UAAW,CAAC,0DAAD,CADL,CANiB,CAUzBf,QAAS,CACPe,UAAW,CAAC,eAAD,CADJ;;AAKPC,WAAY,EALL;;;AAUPX,MAAO,EAVA,CAVgB,CAuBzBe,eAAgB,IAvBS,CAyBzBkB,eAAgB,IAzBS,CA2BzBb,IAAK,IA3BoB,CA6BzBc,cAAe,IA7BU,CA+BzBC,QAAS,IA/BgB,CAA3B;;;AAqCA,uBAAyB,CACvB9B,OAAQ,mBADe,CAEvBS,MAAO,CACLJ,UAAW,CAAC,UAAD,CADN,CAFgB,CAMvBG,OAAQ,CACNH,UAAW,CAAC,eAAD,CADL,CANe,CAUvBf,QAAS,CACPe,UAAW,CAAC,iBAAD,CAAoB,iBAApB,CADJ;;AAKPC,WAAY,EALL;;;AAUPX,MAAO,EAVA,CAVc,CAuBvBe,eAAgB,CACdL,UAAW,CAAC,CAAC,qCAAD,CAAwC,OAAxC,CAAD,CADG,CAvBO,CA2BvBuB,eAAgB,CACdvB,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CA3BO,CA+BvBU,IAAK,CACHV,UAAW,CAAC,CAAC,6BAAD,CAAgC,OAAhC,CAAD,CADR,CA/BkB,CAmCvBwB,cAAe,IAnCQ,CAqCvBC,QAAS,IArCc,CAAzB;;;AA2CA,mBAAqB,CACnB9B,OAAQ,eADW,CAEnBS,MAAO,CACLJ,UAAW,CAAC,eAAD,CADN,CAFY,CAMnBG,OAAQ,CACNH,UAAW,CAAC,iBAAD,CADL,CANW,CAUnBf,QAAS,CACPe,UAAW,CAAC,iBAAD,CADJ;;AAKPC,WAAY,EALL;;;AAUPX,MAAO,CAAC,kBAAD,CAVA,CAVU,CAuBnBe,eAAgB,CACdL,UAAW,CAAC,CAAC,gCAAD,CAAmC,OAAnC,CAAD,CADG,CAvBG,CA2BnBuB,eAAgB,CACdvB,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CA3BG,CA+BnBU,IAAK,CACHV,UAAW,CAAC,CAAC,6BAAD,CAAgC,OAAhC,CAAD,CADR,CA/Bc,CAmCnBwB,cAAe,IAnCI,CAqCnBC,QAAS,IArCU,CAArB;;;AA2CA,iBAAmB,CACjB9B,OAAQ,aADS,CAEjBS,MAAO,CACLJ,UAAW,CAAC,IAAD,CADN,CAFU,CAMjBG,OAAQ,CACNH,UAAW,CAAC,qBAAD,CADL,CANS,CAUjBf,QAAS,CACPe,UAAW,CAAC,cAAD,CADJ;;AAKPC,WAAY,EALL;;;AAUPX,MAAO,CAAC,cAAD,CAVA,CAVQ,CAuBjBe,eAAgB,CACdL,UAAW,CAAC,WAAD,CADG,CAvBC,CA2BjBuB,eAAgB,CACdvB,UAAW,EADG,CA3BC,CA+BjBU,IAAK,CACHV,UAAW,CAAC,CAAC,0BAAD,CAA6B,OAA7B,CAAD,CADR,CA/BY,CAmCjBwB,cAAe,IAnCE,CAqCjBC,QAAS,IArCQ,CAAnB;;;AA2CA,mBAAqB,CACnB9B,OAAQ,eADW,CAEnBS,MAAO,CACLJ,UAAW,CAAC,sBAAD,CADN,CAFY,CAMnBG,OAAQ,CACNH,UAAW,CAAC,oBAAD,CADL,CANW,CAUnBf,QAAS,CACPe,UAAW;AAEX,iBAFW,CADJ;;AAOPC,WAAY,EAPL;;;AAYPX,MAAO,CAAC,iBAAD,CAZA,CAVU,CAyBnBe,eAAgB,CACdL,UAAW,CAAC,CAAC,qBAAD,CAAwB,UAAxB,CAAD,CADG,CAzBG,CA6BnBuB,eAAgB,CACdvB,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CA7BG,CAiCnBU,IAAK,CACHV,UAAW,CAAC,CAAC,6BAAD,CAAgC,OAAhC,CAAD,CADR,CAjCc,CAqCnBwB,cAAe,IArCI,CAuCnBC,QAAS,IAvCU,CAArB;;;AA6CA,sBAAwB,CACtB9B,OAAQ,kBADc,CAEtBS,MAAO,CACLJ,UAAW,CAAC,qBAAD,CADN,CAFe,CAMtBG,OAAQ,CACNH,UAAW,CAAC,gCAAD,CAAmC,gBAAnC,CADL,CANc,CAUtBf,QAAS,CACPe,UAAW,CAAC,gBAAD,CADJ,CAGPW,eAAgB,KAHT;;AAOPV,WAAY,CACVyB,GAAI,GADM,CAPL;;;AAcPpC,MAAO,CAAC,oBAAD,CAAuB,uEAAvB,CAAgG,YAAhG,CAdA,CAVa,CA2BtBe,eAAgB,CACdL,UAAW,CAAC,gBAAD,CADG,CA3BM,CA+BtBuB,eAAgB,CACdvB,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CA/BM,CAmCtBU,IAAK,CACHV,UAAW,CAAC,CAAC,0BAAD,CAA6B,OAA7B,CAAD,CADR,CAnCiB,CAuCtBwB,cAAe,IAvCO,CAyCtBC,QAAS,IAzCa,CAAxB;;;AA+CA,mBAAqB,CACnB9B,OAAQ,kBADW,CAEnBS,MAAO,CACLJ,UAAW,CAAC,gBAAD,CADN,CAFY,CAMnBG,OAAQ,CACNH,UAAW,CAAC,eAAD,CAAkB,KAAlB,CADL,CANW,CAUnBf,QAAS,CACPe,UAAW,CAAC,eAAD,CAAkB,gBAAlB,CADJ;;AAKPC,WAAY,EALL;;;AAUPX,MAAO,EAVA,CAVU,CAuBnBe,eAAgB,CACdL,UAAW,CAAC,CAAC,qCAAD,CAAwC,OAAxC,CAAD,CADG,CAvBG,CA2BnBuB,eAAgB,CACdvB,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CA3BG,CA+BnBU,IAAK,CACHV,UAAW,CAAC,CAAC,6BAAD,CAAgC,OAAhC,CAAD,CADR,CA/Bc,CAmCnBwB,cAAe,IAnCI,CAqCnBC,QAAS,IArCU,CAArB;;;AA2CA,0BAA4B,CAC1B9B,OAAQ,sBADkB,CAE1BS,MAAO,CACLJ,UAAW,CAAC,eAAD,CADN,CAFmB,CAM1BG,OAAQ,CACNH,UAAW,CAAC,CAAC,qBAAD,CAAwB,OAAxB,CAAD,CADL,CANkB,CAU1Bf,QAAS,CACPe,UAAW;AAEX,mBAFW,CAEU,kBAFV,CADJ;;AAOPC,WAAY,EAPL;;;AAYPX,MAAO,EAZA,CAViB,CAyB1BiC,eAAgB,CACdvB,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CAzBU,CA6B1BwB,cAAe,IA7BW,CA+B1BC,QAAS,IA/BiB,CAA5B;;;AAqCA,sBAAwB,CACtB9B,OAAQ,kBADc,CAEtBS,MAAO,CACLJ,UAAW;AAEX,CAAC,uBAAD,CAA0B,OAA1B,CAFW,CADN,CAFe,CAQtBG,OAAQ,CACNH,UAAW,CAAC,oCAAD,CADL,CARc,CAYtBf,QAAS,CACPe,UAAW;AAEX,qBAFW,CAEY,gBAFZ,CAE8B,aAF9B,CAE6C,aAF7C,CADJ;;AAOPC,WAAY,EAPL;;;AAYPX,MAAO,CAAC,YAAD,CAZA,CAZa,CA2BtBe,eAAgB,CACdL,UAAW,CAAC,CAAC,+CAAD,CAAkD,UAAlD,CAAD,CADG,CA3BM,CA+BtBuB,eAAgB,CACdvB,UAAW;AAEX,CAAC,uBAAD,CAA0B,OAA1B,CAFW,CADG,CA/BM,CAqCtBU,IAAK,CACHV,UAAW,CAAC,CAAC,0BAAD,CAA6B,OAA7B,CAAD,CADR,CArCiB,CAyCtBwB,cAAe,IAzCO,CA2CtBC,QAAS,IA3Ca,CAAxB,CA8CA,sBAAwB,CACtB9B,OAAQ,cADc,CAGtBC,iBAAkB,CAAC,aAAD,CAAgB,gBAAhB,CAAkC,YAAlC,CAAgD,aAAhD,CAA+D,cAA/D,CAA+E,WAA/E,CAHI,CAKtBQ,MAAO,CACLJ,UAAW,CAAC,aAAD,CADN,CALe,CAStBG,OAAQ,CACNH,UAAW,CAAC,SAAD,CADL,CATc,CAatBf,QAAS,CACPe,UAAW,CAAC,eAAD,CAAkB,gBAAlB,CADJ;;AAKPC,WAAY,CACV,mDAAoD,6CAAA,CAA8C9C,KAA9C,CAAqD,CACvG,cAAgBA,MAAMC,IAAN,CAAW,IAAX,EAAiBuE,KAAjB,CAAuB,UAAvB,EAAmC,CAAnC,CAAhB,CACAxE,MAAMC,IAAN,CAAW,KAAX,CAAkB,iCAAmCwE,SAArD,EACD,CAJS,CALL;;;AAePtC,MAAO,EAfA,CAba,CA+BtBe,eAAgB,CACdL,UAAW,CAAC,CAAC,wBAAD,CAA2B,UAA3B,CAAD,CADG,CA/BM,CAmCtBuB,eAAgB,CACdvB,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CAnCM,CAuCtBU,IAAK,CACHV,UAAW;CADR,CAvCiB,CA6CtBwB,cAAe,CACbxB,UAAW;CADE,CA7CO,CAmDtByB,QAAS,CACPzB,UAAW;CADJ,CAnDa,CAAxB;;;AA6DA,2BAA6B,CAC3BL,OAAQ,uBADmB,CAE3BS,MAAO,CACLJ,UAAW,CAAC,kBAAD,CADN,CAFoB,CAM3BG,OAAQ,CACNH,UAAW,CAAC,uBAAD,CADL,CANmB,CAU3Bf,QAAS,CACPe,UAAW,CAAC,2BAAD,CADJ;;AAKPC,WAAY,EALL;;;AAUPX,MAAO,EAVA,CAVkB,CAuB3Be,eAAgB,CACdL,UAAW,CAAC,CAAC,8BAAD,CAAiC,OAAjC,CAAD,CADG,CAvBW,CA2B3BuB,eAAgB,CACdvB,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CA3BW,CA+B3BU,IAAK,CACHV,UAAW,CAAC,CAAC,6BAAD,CAAgC,OAAhC,CAAD,CADR,CA/BsB,CAmC3BwB,cAAe,CACbxB,UAAW;CADE,CAnCY,CAyC3ByB,QAAS,CACPzB,UAAW;CADJ,CAzCkB,CAA7B;;;AAmDA,8BAAgC,CAC9BL,OAAQ,0BADsB,CAE9BS,MAAO,CACLJ,UAAW,CAAC,aAAD,CADN,CAFuB,CAM9BG,OAAQ,CACNH,UAAW,CAAC,mBAAD,CADL,CANsB,CAU9Bf,QAAS,CACPe,UAAW,CAAC,mBAAD,CADJ;;AAKPC,WAAY,CACV,iDAAkD,8CAAA,CAA+C9C,KAA/C,CAAsDN,CAAtD,CAAyD,CACzG,SAAWgF,KAAKnG,KAAL,CAAWyB,MAAMC,IAAN,CAAW,YAAX,CAAX,CAAX,CACA,QAAU0E,KAAKC,OAAL,CAAa,CAAb,EAAgBX,GAA1B,CAEA,SAAWvE,EAAE,SAAF,EAAaO,IAAb,CAAkB,KAAlB,CAAyBgE,GAAzB,CAAX,CACAjE,MAAM+D,WAAN,CAAkBc,IAAlB,EACD,CAPS,CALL;;;AAkBP1C,MAAO,EAlBA,CAVqB,CA+B9Be,eAAgB,CACdL,UAAW,CAAC,CAAC,kCAAD,CAAqC,UAArC,CAAD,CADG,CA/Bc,CAmC9BuB,eAAgB,CACdvB,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CAnCc,CAuC9BU,IAAK,CACHV,UAAW,CAAC,CAAC,wBAAD,CAA2B,OAA3B,CAAD,CADR,CAvCyB,CA2C9BwB,cAAe,CACbxB,UAAW;CADE,CA3Ce,CAiD9ByB,QAAS,CACPzB,UAAW;CADJ,CAjDqB,CAAhC,CAwDA,oBAAsB,CACpBL,OAAQ,YADY,CAGpBC,iBAAkB,CAAC,4BAAD,CAHE,CAKpBQ,MAAO,CACLJ,UAAW,CAAC,IAAD,CADN,CALa,CASpBG,OAAQ,CACNH,UAAW,CAAC,CAAC,qBAAD,CAAwB,OAAxB,CAAD,CADL,CATY,CAapBf,QAAS,CACPe,UAAW,CAAC,kBAAD,CADJ;;AAKPC,WAAY;AAEVgC,OAAQ,eAAA,CAAgB9E,KAAhB,CAAuB,CAC7B,SAAW,kEAAX,CACA,UAAY+E,mBAAmB/E,MAAMC,IAAN,CAAW,gBAAX,CAAnB,CAAZ,CAEA,GAAI+E,KAAKhH,IAAL,CAAUiH,KAAV,CAAJ,CAAsB,CACpB,iBAAmBA,MAAMC,KAAN,CAAYF,IAAZ,CAAnB,CACIG,cAAgBC,iBAAeC,YAAf,CAA6B,CAA7B,CADpB,CAEIvF,EAAIqF,cAAc,CAAd,CAFR,CAGIV,UAAYU,cAAc,CAAd,CAHhB;AAMAnF,MAAMC,IAAN,CAAW,KAAX,CAAkB,iCAAmCwE,SAArD,EACA,YAAczE,MAAMyD,OAAN,CAAc,QAAd,CAAd,CACAC,QAAQC,OAAR,CAAgB3D,MAAMsF,KAAN,EAAhB,EACAtF,MAAMkB,MAAN,GACD,CACF,CAlBS,CALL;;;AA6BPiB,MAAO,EA7BA,CAbW,CA6CpBe,eAAgB,CACdL,UAAW,CAAC,CAAC,gBAAD,CAAmB,UAAnB,CAAD,CADG,CA7CI,CAiDpBuB,eAAgB,CACdvB,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CAjDI,CAqDpBU,IAAK,CACHV,UAAW;CADR,CArDe,CA2DpBwB,cAAe,CACbxB,UAAW;CADE,CA3DK,CAiEpByB,QAAS,CACPzB,UAAW;CADJ,CAjEW,CAAtB,CA0EA,qBAAuB,eAAc,CACpC0C,iBAAkBA,gBADkB,CAEpCC,eAAgBA,cAFoB,CAGpCC,mBAAoBA,kBAHgB,CAIpCC,iBAAkBA,gBAJkB,CAKpCC,iBAAkBA,gBALkB,CAMpCC,qBAAsBA,oBANc,CAOpCC,mBAAoBA,kBAPgB,CAQpCC,eAAgBA,cARoB,CASpCC,aAAcA,YATsB,CAUpCC,eAAgBA,cAVoB,CAWpCC,kBAAmBA,iBAXiB,CAYpCC,eAAgBA,cAZoB,CAapCC,sBAAuBA,qBAba,CAcpCC,kBAAmBA,iBAdiB,CAepCC,kBAAmBA,iBAfiB,CAgBpCC,uBAAwBA,sBAhBY,CAiBpCC,0BAA2BA,yBAjBS,CAkBpCC,gBAAiBA,eAlBmB,CAAd,CAAvB,CAqBA,eAAiBC,aAAaC,gBAAb,EAA+BpE,MAA/B,CAAsC,SAAUC,GAAV,CAAeoE,GAAf,CAAoB,CACzE,cAAgBD,iBAAiBC,GAAjB,CAAhB,CACA,kBAAgB,EAAT,CAAapE,GAAb,CAAkBqE,sBAAsBxE,SAAtB,CAAlB,CAAP,CACD,CAHgB,CAGd,EAHc,CAAjB;AAMA,cAAgB,UAAA,CAAW,gCAAX,CAA6C,GAA7C,CAAhB;;AAIA,eAAiB,qBAAjB,CAEA,mBAAqB,CAAC,wCAAD,CAA2C,uCAA3C,CAAoF,qCAApF,CAA2H,oCAA3H,CAArB;AAGA,sBAAwB,CAAC,OAAD,CAAU,QAAV,CAAoB,UAApB,CAAgC,MAAhC,CAAwC,OAAxC,CAAiD,IAAjD,CAAuD,OAAvD,CAAgE,QAAhE,CAA0E,QAA1E,CAAxB;AAGA,iBAAmB,CAAC,OAAD,CAAU,OAAV,CAAnB,CACA,0BAA4ByE,aAAajL,GAAb,CAAiB,SAAUkL,QAAV,CAAoB,CAC/D,MAAO,IAAMA,QAAN,CAAiB,GAAxB,CACD,CAF2B,CAA5B,CAGA,qBAAuBD,aAAa7J,IAAb,CAAkB,GAAlB,CAAvB,CACA,oBAAsB,CAAC,KAAD,CAAQ,QAAR,CAAkB,MAAlB,CAA0B,OAA1B,CAAmC,IAAnC,CAAyC,KAAzC,CAAtB,CACA,uBAAyB,UAAA,CAAW,KAAO+J,gBAAgB/J,IAAhB,CAAqB,GAArB,CAAP,CAAmC,IAA9C,CAAoD,GAApD,CAAzB;AAGA,sBAAwB,CAAC,GAAD,CAAxB,CACA,2BAA6BgK,kBAAkBpL,GAAlB,CAAsB,SAAUqL,GAAV,CAAe,CAChE,WAAa,QAAb,CACD,CAF4B,EAE1BjK,IAF0B,CAErB,GAFqB,CAA7B;AAKA,6BAA+B,CAAC,IAAD,CAAO,IAAP,CAAa,OAAb,CAAsB,KAAtB,CAA6B,QAA7B,CAAuC,MAAvC,EAA+CA,IAA/C,CAAoD,GAApD,CAA/B;AAGA,gBAAkB,CAAC,IAAD,CAAO,IAAP,CAAa,IAAb,CAAmB,IAAnB,CAAyB,IAAzB,CAAlB,CACA,oBAAsBkK,YAAYlK,IAAZ,CAAiB,GAAjB,CAAtB;;;;;AAQA,kCAAoC,CAAC,UAAD,CAAa,OAAb,CAAsB,QAAtB,CAAgC,SAAhC,CAA2C,SAA3C,CAAsD,KAAtD,CAA6D,gBAA7D,CAA+E,OAA/E,CAAwF,SAAxF,CAAmG,cAAnG,CAAmH,QAAnH,CAA6H,iBAA7H,CAAgJ,OAAhJ,CAAyJ,MAAzJ;AAEpC,QAFoC,CAE1B,QAF0B,CAEhB,QAFgB,CAEN,OAFM;AAGpC,MAHoC,CAG5B,MAH4B,CAGpB,KAHoB,CAGb,UAHa,CAGD,OAHC,CAGQ,YAHR,CAGsB,UAHtB;AAIpC,2BAJoC;AAKpC,OALoC,CAK3B,eAL2B,CAKV,SALU,CAKC,QALD,CAKW,QALX,CAKqB,KALrB,CAK4B,OAL5B,CAKqC,UALrC,CAKiD,SALjD,CAK4D,UAL5D,CAKwE,SALxE,CAKmF,SALnF,CAK8F,OAL9F,CAApC;;;;;;;;;;;AAkBA,kCAAoC,CAAC,KAAD,CAAQ,SAAR,CAAmB,MAAnB,CAA2B,WAA3B,CAAwC,QAAxC,CAAkD,SAAlD,CAA6D,qBAA7D,CAAoF,QAApF;AACpC,OADoC,CAC3B,QAD2B,CACjB,OADiB,CACR,MADQ,CACA,MADA,CACQ,OADR,CACiB,QADjB,CAApC;;;AAMA,wBAA0B,CAAC,GAAD,CAAM,YAAN,CAAoB,IAApB,CAA0B,KAA1B,CAAiC,KAAjC,CAAwC,GAAxC,CAA6C,KAA7C,CAAoD,OAApD,EAA6DA,IAA7D,CAAkE,GAAlE,CAA1B;;;;;;;;;AAoBA,yBAA2B,CAAC,SAAD,CAAY,gBAAZ,CAA8B,iBAA9B,CAAiD,MAAjD,CAAyD,MAAzD,CAAiE,SAAjE,CAA4E,qBAA5E,CAAmG,OAAnG,CAA4G,QAA5G,CAAsH,MAAtH,CAA8H,QAA9H,CAAwI,MAAxI,CAAgJ,YAAhJ,CAA8J,WAA9J,CAA2K,MAA3K,CAAmL,OAAnL,CAA4L,MAA5L,CAAoM,UAApM;AAC3B,SAD2B,CAA3B;AAIA,sBAAwB,UAAA,CAAWmK,qBAAqBnK,IAArB,CAA0B,GAA1B,CAAX,CAA2C,GAA3C,CAAxB;;;;;AASA,yBAA2B,CAAC,OAAD,CAAU,QAAV,CAAoB,QAApB,CAA8B,KAA9B,CAAqC,UAArC,CAAiD,QAAjD,CAA2D,QAA3D,CAAqE,OAArE,CAA8E,MAA9E,CAAsF,OAAtF,CAA+F,SAA/F,CAA0G,YAA1G,CAAwH,SAAxH,CAAmI,MAAnI,CAA2I,QAA3I,CAAqJ,OAArJ,CAA8J,MAA9J,CAAsK,MAAtK,CAA8K,SAA9K,CAAyL,UAAzL;AAC3B,MAD2B,CACnB,QADmB,CACT,UADS,CACG,MADH,CACW,MADX,CACmB,MADnB,CAC2B,UAD3B;AAE3B,mBAF2B,CAEN,MAFM,CAEE,WAFF,CAEe,MAFf,CAEuB,UAFvB,CAEmC,OAFnC,CAE4C,MAF5C,CAEoD,OAFpD,CAE6D,UAF7D;AAG3B,OAH2B,CAGlB,KAHkB;AAI3B,SAJ2B,CAIhB,SAJgB,CAIL,cAJK;AAK3B,QAL2B,CAKjB,WALiB,CAKJ,OALI,CAKK,UALL,CAKiB,UALjB,CAK6B,MAL7B,CAKqC,SALrC,CAKgD,SALhD,CAK2D,OAL3D,CAKoE,KALpE,CAK2E,SAL3E,CAKsF,MALtF,CAK8F,OAL9F,CAKuG,QALvG,CAA3B;AAOA,sBAAwB,UAAA,CAAWoK,qBAAqBpK,IAArB,CAA0B,GAA1B,CAAX,CAA2C,GAA3C,CAAxB;AAGA,mBAAqB,wCAArB;;;;AAWA,YAAc,UAAA,CAAW,iBAAX,CAA8B,GAA9B,CAAd;;;;;;;;;;;;AAwBA,qBAAuB,CAAC,SAAD,CAAY,OAAZ,CAAqB,YAArB,CAAmC,MAAnC,CAA2C,IAA3C,CAAiD,QAAjD,CAA2D,QAA3D,CAAqE,SAArE,CAAgF,KAAhF,CAAuF,UAAvF,CAAmG,IAAnG,CAAyG,KAAzG,CAAgH,IAAhH,CAAsH,IAAtH,CAA4H,OAA5H,CAAqI,UAArI,CAAiJ,YAAjJ,CAA+J,QAA/J,CAAyK,QAAzK,CAAmL,MAAnL,CAA2L,IAA3L,CAAiM,IAAjM,CAAuM,IAAvM,CAA6M,IAA7M,CAAmN,IAAnN,CAAyN,IAAzN,CAA+N,QAA/N,CAAyO,QAAzO,CAAmP,IAAnP,CAAyP,IAAzP,CAA+P,KAA/P,CAAsQ,QAAtQ,CAAgR,IAAhR,CAAsR,QAAtR,CAAgS,GAAhS,CAAqS,KAArS,CAA4S,UAA5S,CAAwT,SAAxT,CAAmU,OAAnU,CAA4U,OAA5U,CAAqV,UAArV,CAAiW,OAAjW,CAA0W,IAA1W,CAAgX,OAAhX,CAAyX,IAAzX,CAA+X,IAA/X,CAAqY,OAArY,CAAvB,CACA,wBAA0B,UAAA,CAAW,KAAOqK,iBAAiBrK,IAAjB,CAAsB,GAAtB,CAAP,CAAoC,IAA/C,CAAqD,GAArD,CAA1B;;;;AAMA,wBAA0BsK,8BAA8BtK,IAA9B,CAAmC,GAAnC,CAA1B,CACA,yBAA2B,UAAA,CAAWuK,mBAAX,CAAgC,GAAhC,CAA3B,CAEA,wBAA0BC,8BAA8BxK,IAA9B,CAAmC,GAAnC,CAA1B,CACA,yBAA2B,UAAA,CAAWyK,mBAAX,CAAgC,GAAhC,CAA3B,CAEA,gCAAA,CAAiC/H,CAAjC,CAAoC;;;;;;;;;AAUlCA,EAAE,GAAF,EAAOgI,GAAP,CAAW,GAAX,EAAgB7H,IAAhB,CAAqB,SAAUc,KAAV,CAAiBZ,IAAjB,CAAuB,CAC1C,UAAYL,EAAEK,IAAF,CAAZ,CACA,YAAcC,MAAMC,IAAN,CAAW,OAAX,CAAd,CACA,OAASD,MAAMC,IAAN,CAAW,IAAX,CAAT,CACA,GAAI,CAAC0H,EAAD,EAAO,CAACC,OAAZ,CAAqB,OAErB,eAAiB,CAACA,SAAW,EAAZ,EAAkB,GAAlB,EAAyBD,IAAM,EAA/B,CAAjB,CACA,GAAIE,qBAAqB7J,IAArB,CAA0B8J,UAA1B,CAAJ,CAA2C,CACzC,OACD,CAFD,QAEWC,qBAAqB/J,IAArB,CAA0B8J,UAA1B,CAAJ,CAA2C,CAChD9H,MAAMkB,MAAN,GACD,CACF,CAZD,EAcA,QAAA,CACD;;;;;;;AAWD,mBAAA,CAAoBxB,CAApB,CAAuB,CACrB,eAAiB,KAAjB,CACAA,EAAE,IAAF,EAAQG,IAAR,CAAa,SAAUc,KAAV,CAAiBqH,OAAjB,CAA0B,CACrC,gBAAkBtI,EAAEsI,OAAF,EAAW3L,IAAX,GAAkBgH,GAAlB,CAAsB,CAAtB,CAAlB,CAEA,GAAI4E,aAAeA,YAAY3E,OAAZ,GAAwB,IAA3C,CAAiD,CAC/C4E,WAAa,IAAb,CACAxI,EAAEsI,OAAF,EAAW9G,MAAX,GACD,CAHD,QAGWgH,UAAJ,CAAgB,CACrBA,WAAa,KAAb;AAEAC,aAAaH,OAAb,CAAsBtI,CAAtB,CAAyB,IAAzB,EACD,CACF,CAXD,EAaA,QAAA,CACD;;;;;;;;;;AAaD,qBAAA,CAAsBK,IAAtB,CAA4BL,CAA5B,CAA+B,CAC7B,OAAS3D,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,KAA7E,CAEA,UAAY2D,EAAEK,IAAF,CAAZ,CAEA,GAAIqI,EAAJ,CAAQ,CACN,YAAcrI,KAAKsI,WAAnB,CACA,MAAQ3I,EAAE,SAAF,CAAR;;AAIA,MAAO4I,SAAW,EAAEA,QAAQhF,OAAR,EAAmBiF,oBAAoBvK,IAApB,CAAyBsK,QAAQhF,OAAjC,CAArB,CAAlB,CAAmF,CACjF,gBAAkBgF,QAAQD,WAA1B,CACA3I,EAAE4I,OAAF,EAAWE,QAAX,CAAoBC,CAApB,EACAH,QAAUD,WAAV,CACD,CAEDrI,MAAM+D,WAAN,CAAkB0E,CAAlB,EACAzI,MAAMkB,MAAN,GACA,QAAA,CACD,CAED,QAAA,CACD,CAED,oBAAA,CAAqBxB,CAArB,CAAwB,CACtBA,EAAE,KAAF,EAASG,IAAT,CAAc,SAAUc,KAAV,CAAiB+H,GAAjB,CAAsB,CAClC,SAAWhJ,EAAEgJ,GAAF,CAAX,CACA,gBAAkBC,KAAK3G,QAAL,CAAc4G,mBAAd,EAAmC5M,MAAnC,GAA8C,CAAhE,CAEA,GAAI6M,WAAJ,CAAiB,CACfC,cAAcH,IAAd,CAAoBjJ,CAApB,CAAuB,GAAvB,EACD,CACF,CAPD,EASA,QAAA,CACD,CAED,qBAAA,CAAsBA,CAAtB,CAAyB,CACvBA,EAAE,MAAF,EAAUG,IAAV,CAAe,SAAUc,KAAV,CAAiBoI,IAAjB,CAAuB,CACpC,UAAYrJ,EAAEqJ,IAAF,CAAZ,CACA,gBAAkBC,MAAMvF,OAAN,CAAc,QAAd,EAAwBzH,MAAxB,GAAmC,CAArD,CACA,GAAI6M,WAAJ,CAAiB,CACfC,cAAcE,KAAd,CAAqBtJ,CAArB,CAAwB,GAAxB,EACD,CACF,CAND,EAQA,QAAA,CACD;;;;;;;;;;;AAcD,+BAAA,CAAgCA,CAAhC,CAAmC,CACjCA,EAAIuJ,WAAWvJ,CAAX,CAAJ,CACAA,EAAIwJ,YAAYxJ,CAAZ,CAAJ,CACAA,EAAIyJ,aAAazJ,CAAb,CAAJ,CAEA,QAAA,CACD,CAED,sBAAA,CAAuBM,KAAvB,CAA8BN,CAA9B,CAAiC,CAC/B,QAAU3D,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,GAA9E,CAEA,SAAWiE,MAAMqD,GAAN,CAAU,CAAV,CAAX,CACA,GAAI,CAACtD,IAAL,CAAW,CACT,QAAA,CACD,CAED,eAAiBC,MAAMqD,GAAN,CAAU,CAAV,CAAjB,CACI9C,QAAU6I,WAAW7I,OADzB,CAGA,iBAAmBD,mBAAiBC,OAAjB,EAA0B3E,GAA1B,CAA8B,SAAU+K,GAAV,CAAe,CAC9D,WAAa,GAAN,CAAYpG,QAAQoG,GAAR,CAAnB,CACD,CAFkB,EAEhB3J,IAFgB,CAEX,GAFW,CAAnB,CAIAgD,MAAM+D,WAAN,CAAkB,IAAMkD,GAAN,CAAY,GAAZ,CAAkBoC,YAAlB,CAAiC,GAAjC,CAAuCrJ,MAAMe,QAAN,EAAvC,CAA0D,IAA1D,CAAiEkG,GAAjE,CAAuE,GAAzF,EACA,QAAA,CACD,CAED,uBAAA,CAAwBpC,IAAxB,CAA8BnF,CAA9B,CAAiC,CAC/B,WAAa4J,SAASzE,KAAK5E,IAAL,CAAU,QAAV,CAAT,CAA8B,EAA9B,CAAb,CACA,UAAYqJ,SAASzE,KAAK5E,IAAL,CAAU,OAAV,CAAT,CAA6B,EAA7B,GAAoC,EAAhD;;;AAKA,GAAI,CAACsJ,QAAU,EAAX,EAAiB,EAAjB,EAAuBpF,MAAQ,EAAnC,CAAuC,CACrCU,KAAK3D,MAAL,GACD,CAFD,QAEWqI,MAAJ,CAAY;;;AAIjB1E,KAAK1E,UAAL,CAAgB,QAAhB,EACD,CAED,QAAA,CACD;;AAID,sBAAA,CAAuB0E,IAAvB,CAA6BnF,CAA7B,CAAgC,CAC9B,GAAI8J,UAAUxL,IAAV,CAAe6G,KAAK5E,IAAL,CAAU,KAAV,CAAf,CAAJ,CAAsC,CACpC4E,KAAK3D,MAAL,GACD,CAED,QAAA,CACD,CAED,oBAAA,CAAqBuI,QAArB,CAA+B/J,CAA/B,CAAkC,CAChC+J,SAAS3I,IAAT,CAAc,KAAd,EAAqBjB,IAArB,CAA0B,SAAUc,KAAV,CAAiBN,GAAjB,CAAsB,CAC9C,SAAWX,EAAEW,GAAF,CAAX,CAEAqJ,eAAe7E,IAAf,CAAqBnF,CAArB,EACAiK,cAAc9E,IAAd,CAAoBnF,CAApB,EACD,CALD,EAOA,QAAA,CACD,CAED,mBAAA,CAAoBkK,OAApB,CAA6BlK,CAA7B,CAAgCvB,GAAhC,CAAqC,CACnC,SAAWpC,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,EAA/E,CAEA,GAAI8N,KAAK7N,MAAL,GAAgB,CAApB,CAAuB,CACrB6N,KAAOC,cAAP,CACD,CAED,GAAI3L,GAAJ,CAAS,CACP,eAAiBG,MAAIC,KAAJ,CAAUJ,GAAV,CAAjB,CACI4L,SAAWC,WAAWD,QAD1B,CAEIpN,SAAWqN,WAAWrN,QAF1B,CAIAkN,KAAO,GAAGlH,MAAH,CAAUC,mBAAmBiH,IAAnB,CAAV,CAAoC,CAAC,gBAAkBE,QAAlB,CAA6B,IAA7B,CAAoCpN,QAApC,CAA+C,IAAhD,CAApC,CAAP,CACD,CAED+C,EAAEmK,KAAK7M,IAAL,CAAU,GAAV,CAAF,CAAkB4M,OAAlB,EAA2BK,QAA3B,CAAoCC,UAApC,EAEA,QAAA,CACD,CAED,sBAAA,CAAuBN,OAAvB,CAAgClK,CAAhC,CAAmC,CACjC,SAAW3D,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,EAA/E,CAEA,GAAI8N,KAAK7N,MAAL,GAAgB,CAApB,CAAuB,CACrB6N,KAAOM,iBAAP,CACD;;AAIDzK,EAAEmK,KAAK7M,IAAL,CAAU,GAAV,CAAF,CAAkB4M,OAAlB,EAA2BlC,GAA3B,CAA+B,IAAMwC,UAArC,EAAiDhJ,MAAjD;AAGAxB,EAAE,IAAMwK,UAAR,CAAoBN,OAApB,EAA6BQ,WAA7B,CAAyCF,UAAzC,EAEA,QAAA,CACD;;;AAMD,sBAAA,CAAuBN,OAAvB,CAAgClK,CAAhC,CAAmC,CACjC,WAAaA,EAAE,IAAF,CAAQkK,OAAR,CAAb,CAEA,GAAIS,OAAOrO,MAAP,CAAgB,CAApB,CAAuB,CACrBqO,OAAOxK,IAAP,CAAY,SAAUc,KAAV,CAAiBZ,IAAjB,CAAuB,CACjC,SAASA,IAAF,EAAQmB,MAAR,EAAP,CACD,CAFD,EAGD,CAJD,IAIO,CACLmJ,OAAOxK,IAAP,CAAY,SAAUc,KAAV,CAAiBZ,IAAjB,CAAuB,CACjC+I,cAAcpJ,EAAEK,IAAF,CAAd,CAAuBL,CAAvB,CAA0B,IAA1B,EACD,CAFD,EAGD,CAED,QAAA,CACD,CAED,8BAAA,CAA+B+J,QAA/B,CAAyC,CACvCA,SAAS3I,IAAT,CAAc,GAAd,EAAmBjB,IAAnB,CAAwB,SAAUc,KAAV,CAAiBZ,IAAjB,CAAuB,CAC7CA,KAAKQ,OAAL,CAAeD,mBAAiBP,KAAKQ,OAAtB,EAA+B+B,MAA/B,CAAsC,SAAUC,GAAV,CAAetC,IAAf,CAAqB,CACxE,GAAIqK,mBAAmBtM,IAAnB,CAAwBiC,IAAxB,CAAJ,CAAmC,CACjC,kBAAgB,EAAT,CAAasC,GAAb,CAAkBgI,gBAAgB,EAAhB,CAAoBtK,IAApB,CAA0BF,KAAKQ,OAAL,CAAaN,IAAb,CAA1B,CAAlB,CAAP,CACD,CAED,UAAA,CACD,CANc,CAMZ,EANY,CAAf,CAOD,CARD,EAUA,eAAA,CACD;;;;;;AASD,wBAAA,CAAyBwJ,QAAzB,CAAmC;;;AAIjC,6BAA6BA,SAASe,MAAT,GAAkBxO,MAAlB,CAA2ByN,SAASe,MAAT,EAA3B,CAA+Cf,QAArE,CAAP,CACD,CAED,oBAAA,CAAqBA,QAArB,CAA+B/J,CAA/B,CAAkC,CAChC+J,SAAS3I,IAAT,CAAc,GAAd,EAAmBjB,IAAnB,CAAwB,SAAUc,KAAV,CAAiB8H,CAAjB,CAAoB,CAC1C,OAAS/I,EAAE+I,CAAF,CAAT,CACA,GAAIgC,GAAG3J,IAAH,CAAQ,aAAR,EAAuB9E,MAAvB,GAAkC,CAAlC,EAAuCyO,GAAGC,IAAH,GAAUC,IAAV,KAAqB,EAAhE,CAAoEF,GAAGvJ,MAAH,GACrE,CAHD,EAKA,QAAA,CACD;;;;;AAQD,oCAAsC,CAAC,UAAD,CAAa,OAAb,CAAsB,QAAtB,CAAgC,SAAhC,CAA2C,SAA3C,CAAsD,KAAtD,CAA6D,gBAA7D,CAA+E,OAA/E,CAAwF,SAAxF,CAAmG,cAAnG,CAAmH,QAAnH,CAA6H,iBAA7H,CAAgJ,OAAhJ,CAAyJ,MAAzJ,CAAiK,MAAjK,CAAyK,QAAzK,CAAmL,QAAnL,CAA6L,QAA7L,CAAuM,OAAvM;AACtC,MADsC,CAC9B,MAD8B,CACtB,KADsB,CACf,OADe,CACN,YADM,CACQ,UADR;AAEtC,2BAFsC;AAGtC,OAHsC,CAG7B,eAH6B,CAGZ,SAHY,CAGD,QAHC,CAGS,QAHT,CAGmB,KAHnB,CAG0B,OAH1B,CAGmC,UAHnC,CAG+C,SAH/C,CAG0D,UAH1D,CAGsE,SAHtE,CAGiF,OAHjF,CAAtC;;;;;;;;;;;AAgBA,oCAAsC,CAAC,KAAD,CAAQ,SAAR,CAAmB,MAAnB,CAA2B,WAA3B,CAAwC,QAAxC,CAAkD,SAAlD,CAA6D,qBAA7D,CAAoF,QAApF;AACtC,OADsC,CAC7B,QAD6B,CACnB,OADmB,CACV,MADU,CACF,MADE,CACM,OADN,CACe,QADf,CAAtC;;;AAMA,0BAA4B,CAAC,GAAD,CAAM,YAAN,CAAoB,IAApB,CAA0B,KAA1B,CAAiC,KAAjC,CAAwC,GAAxC,CAA6C,KAA7C,CAAoD,OAApD,EAA6DlE,IAA7D,CAAkE,GAAlE,CAA5B;;AAIA,6BAA+B,CAAC,IAAD,CAAO,GAAP,CAAY,GAAZ,CAAiB,OAAjB,CAA0B,IAA1B,CAAgC,MAAhC,CAAwC,MAAxC,CAAgD,UAAhD,CAA4D,OAA5D,CAAqE,KAArE,CAA4E,MAA5E,CAAoF,MAApF,CAA/B,CAEA,gCAAkC,UAAA,CAAW,KAAO4N,yBAAyB5N,IAAzB,CAA8B,GAA9B,CAAP,CAA4C,IAAvD,CAA6D,GAA7D,CAAlC;;;AAKA,8BAAgC,CAAC,CAAC,SAAD,CAAY,gBAAZ,CAAD,CAAgC,CAAC,OAAD,CAAU,gBAAV,CAAhC,CAA6D,CAAC,QAAD,CAAW,gBAAX,CAA7D,CAA2F,CAAC,OAAD,CAAU,WAAV,CAA3F,CAAmH,CAAC,OAAD,CAAU,YAAV,CAAnH,CAA4I,CAAC,OAAD,CAAU,YAAV,CAA5I,CAAhC,CAEA,kBAAoB,CAAC,QAAD,CAAW,OAAX,CAAoB,OAApB,CAA6B,SAA7B,CAApB,CACA,qBAAuB,UAAA,CAAW6N,cAAc7N,IAAd,CAAmB,GAAnB,CAAX,CAAoC,GAApC,CAAvB;;;;AAMA,2BAA6B,CAAC,SAAD,CAAY,gBAAZ,CAA8B,iBAA9B,CAAiD,MAAjD,CAAyD,MAAzD,CAAiE,SAAjE,CAA4E,qBAA5E,CAAmG,OAAnG,CAA4G,QAA5G,CAAsH,MAAtH,CAA8H,QAA9H,CAAwI,MAAxI,CAAgJ,YAAhJ,CAA8J,WAA9J,CAA2K,MAA3K,CAAmL,OAAnL,CAA4L,MAA5L,CAAoM,UAApM;AAC7B,SAD6B,CAA7B;AAIA,wBAA0B,UAAA,CAAW8N,uBAAuB9N,IAAvB,CAA4B,GAA5B,CAAX,CAA6C,GAA7C,CAA1B;AAGA,wBAA0B,UAAA,CAAW,qBAAX,CAAkC,GAAlC,CAA1B;;;;AAMA,2BAA6B,CAAC,OAAD,CAAU,QAAV,CAAoB,QAApB,CAA8B,KAA9B,CAAqC,UAArC,CAAiD,QAAjD,CAA2D,QAA3D,CAAqE,OAArE,CAA8E,MAA9E,CAAsF,OAAtF,CAA+F,SAA/F,CAA0G,YAA1G,CAAwH,SAAxH,CAAmI,MAAnI,CAA2I,QAA3I,CAAqJ,OAArJ,CAA8J,MAA9J,CAAsK,MAAtK,CAA8K,SAA9K,CAAyL,UAAzL;AAC7B,MAD6B,CACrB,QADqB,CACX,UADW,CACC,MADD,CACS,MADT,CACiB,MADjB,CACyB,UADzB;AAE7B,mBAF6B,CAER,MAFQ,CAEA,WAFA,CAEa,MAFb,CAEqB,UAFrB,CAEiC,OAFjC,CAE0C,MAF1C,CAEkD,OAFlD,CAE2D,UAF3D;AAG7B,OAH6B,CAGpB,KAHoB;AAI7B,SAJ6B,CAIlB,SAJkB,CAIP,cAJO;AAK7B,QAL6B,CAKnB,WALmB,CAKN,OALM,CAKG,UALH,CAKe,UALf,CAK2B,MAL3B,CAKmC,SALnC,CAK8C,SAL9C,CAKyD,OALzD,CAKkE,KALlE,CAKyE,SALzE,CAKoF,MALpF,CAK4F,OAL5F,CAKqG,QALrG,CAA7B;AAOA,wBAA0B,UAAA,CAAW+N,uBAAuB/N,IAAvB,CAA4B,GAA5B,CAAX,CAA6C,GAA7C,CAA1B;;;;;;;;;AAoBA,0BAA4BgO,gCAAgChO,IAAhC,CAAqC,GAArC,CAA5B,CAGA,0BAA4BiO,gCAAgCjO,IAAhC,CAAqC,GAArC,CAA5B,CAKA,2BAA6B,UAAA,CAAW,mBAAX,CAAgC,GAAhC,CAA7B,CACA,yBAA2B,UAAA,CAAW,4BAAX,CAAyC,GAAzC,CAA3B,CACA,eAAiB,UAAA,CAAW,kBAAX,CAA+B,GAA/B,CAAjB;AAGA,kBAAA,CAAmB+C,IAAnB,CAAyB,CACvB,YAAcA,KAAKE,IAAL,CAAU,OAAV,CAAd,CACA,OAASF,KAAKE,IAAL,CAAU,IAAV,CAAT,CACA,UAAY,CAAZ,CAEA,GAAI0H,EAAJ,CAAQ;AAEN,GAAIuD,oBAAoBlN,IAApB,CAAyB2J,EAAzB,CAAJ,CAAkC,CAChCwD,OAAS,EAAT,CACD,CACD,GAAIC,oBAAoBpN,IAApB,CAAyB2J,EAAzB,CAAJ,CAAkC,CAChCwD,OAAS,EAAT,CACD,CACF,CAED,GAAIvD,OAAJ,CAAa,CACX,GAAIuD,QAAU,CAAd,CAAiB;;AAGf,GAAID,oBAAoBlN,IAApB,CAAyB4J,OAAzB,CAAJ,CAAuC,CACrCuD,OAAS,EAAT,CACD,CACD,GAAIC,oBAAoBpN,IAApB,CAAyB4J,OAAzB,CAAJ,CAAuC,CACrCuD,OAAS,EAAT,CACD,CACF;;;AAKD,GAAIE,iBAAiBrN,IAAjB,CAAsB4J,OAAtB,CAAJ,CAAoC,CAClCuD,OAAS,EAAT,CACD;;;;AAMD,GAAIG,oBAAoBtN,IAApB,CAAyB4J,OAAzB,CAAJ,CAAuC,CACrCuD,OAAS,EAAT,CACD,CACF,CAED,YAAA,CACD;;;AAKD,iBAAA,CAAkBnL,KAAlB,CAAyB,CACvB,kBAAkBA,MAAMC,IAAN,CAAW,OAAX,CAAX,GAAmC,IAA1C,CACD;AAGD,oBAAA,CAAqByK,IAArB,CAA2B,CACzB,MAAO,CAACA,KAAKxF,KAAL,CAAW,IAAX,GAAoB,EAArB,EAAyBlJ,MAAhC,CACD,CAED,UAAY,UAAA,CAAW,WAAX,CAAwB,GAAxB,CAAZ,CAEA,oBAAA,CAAqBuP,UAArB,CAAiC,CAC/B,YAAcxP,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,GAAlF,CAEA,WAAawP,WAAa,EAA1B,CAEA,GAAIC,OAAS,CAAb,CAAgB,CACd,gBAAkB,MAAlB;;;;;AAOA,GAAIC,MAAMzN,IAAN,CAAWsF,OAAX,CAAJ,CAAyB,CACvBoI,YAAcF,OAAS,CAAvB,CACD,CAFD,IAEO,CACLE,YAAcF,OAAS,IAAvB,CACD,CAED,YAAYG,GAAL,CAASC,KAAKC,GAAL,CAASH,WAAT,CAAsB,CAAtB,CAAT,CAAmC,CAAnC,CAAP,CACD,CAED,QAAA,CACD;;AAID,0BAAA,CAA2B3L,IAA3B,CAAiC,CAC/B,UAAY,CAAZ,CACA,SAAWA,KAAK2K,IAAL,GAAYC,IAAZ,EAAX,CACA,eAAiBD,KAAK1O,MAAtB;AAGA,GAAIuP,WAAa,EAAjB,CAAqB,CACnB,QAAA,CACD;AAGDJ,OAASW,YAAYpB,IAAZ,CAAT;;AAIAS,OAASY,YAAYR,UAAZ,CAAT;;;;AAMA,GAAIb,KAAKsB,KAAL,CAAW,CAAC,CAAZ,IAAmB,GAAvB,CAA4B,CAC1Bb,OAAS,CAAT,CACD,CAED,YAAA,CACD,CAED,iBAAA,CAAkBnL,KAAlB,CAAyBN,CAAzB,CAA4ByL,KAA5B,CAAmC,CACjCnL,MAAMC,IAAN,CAAW,OAAX,CAAoBkL,KAApB,EACA,YAAA,CACD,CAED,oBAAA,CAAqBnL,KAArB,CAA4BN,CAA5B,CAA+BuM,MAA/B,CAAuC,CACrC,GAAI,CACF,UAAYC,kBAAkBlM,KAAlB,CAAyBN,CAAzB,EAA8BuM,MAA1C,CACAE,SAASnM,KAAT,CAAgBN,CAAhB,CAAmByL,KAAnB,EACD,CAAC,MAAOiB,CAAP,CAAU;CAIZ,YAAA,CACD;AAGD,uBAAA,CAAwBrM,IAAxB,CAA8BL,CAA9B,CAAiCyL,KAAjC,CAAwC,CACtC,WAAapL,KAAKyK,MAAL,EAAb,CACA,GAAIA,MAAJ,CAAY,CACV6B,YAAY7B,MAAZ,CAAoB9K,CAApB,CAAuByL,MAAQ,IAA/B,EACD,CAED,WAAA,CACD;;;AAKD,0BAAA,CAA2BnL,KAA3B,CAAkCN,CAAlC,CAAqC,CACnC,gBAAkB3D,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,IAAtF,CAEA,UAAYuQ,SAAStM,KAAT,CAAZ,CAEA,GAAImL,KAAJ,CAAW,CACT,YAAA,CACD,CAEDA,MAAQoB,aAAavM,KAAb,CAAR,CAEA,GAAIwM,WAAJ,CAAiB,CACfrB,OAASsB,UAAUzM,KAAV,CAAT,CACD,CAED0M,eAAe1M,KAAf,CAAsBN,CAAtB,CAAyByL,KAAzB,EAEA,YAAA,CACD;;AAID,qBAAA,CAAsBnL,KAAtB,CAA6B,CAC3B,eAAiBA,MAAMqD,GAAN,CAAU,CAAV,CAAjB,CACIC,QAAU8F,WAAW9F,OADzB;;;AAQA,GAAIqJ,uBAAuB3O,IAAvB,CAA4BsF,OAA5B,CAAJ,CAA0C,CACxC,yBAAyBtD,KAAlB,CAAP,CACD,CAFD,QAEWsD,UAAY,KAAhB,CAAuB,CAC5B,QAAA,CACD,CAFM,QAEIsJ,qBAAqB5O,IAArB,CAA0BsF,OAA1B,CAAJ,CAAwC,CAC7C,QAAA,CACD,CAFM,QAEIuJ,WAAW7O,IAAX,CAAgBsF,OAAhB,CAAJ,CAA8B,CACnC,MAAO,CAAC,CAAR,CACD,CAFM,QAEIA,UAAY,IAAhB,CAAsB,CAC3B,MAAO,CAAC,CAAR,CACD,CAED,QAAA,CACD,CAED,uBAAA,CAAwBtD,KAAxB,CAA+BN,CAA/B,CAAkC,CAChC,GAAIM,MAAMqD,GAAN,CAAU,CAAV,CAAJ,CAAkB,CAChB,eAAiBrD,MAAMqD,GAAN,CAAU,CAAV,CAAjB,CACIC,QAAU8F,WAAW9F,OADzB,CAGA,GAAIA,UAAY,MAAhB,CAAwB;AAEtBwF,cAAc9I,KAAd,CAAqBN,CAArB,CAAwB,KAAxB,EACD,CACF,CACF,CAED,mBAAA,CAAoBM,KAApB,CAA2BN,CAA3B,CAA8ByL,KAA9B,CAAqC,CACnC,GAAInL,KAAJ,CAAW,CACT8M,eAAe9M,KAAf,CAAsBN,CAAtB,EACA2M,YAAYrM,KAAZ,CAAmBN,CAAnB,CAAsByL,KAAtB,EACD,CACF,CAED,gBAAA,CAAiBzL,CAAjB,CAAoB8M,WAApB,CAAiC,CAC/B9M,EAAE,QAAF,EAAYgI,GAAZ,CAAgB,SAAhB,EAA2B7H,IAA3B,CAAgC,SAAUc,KAAV,CAAiBZ,IAAjB,CAAuB;;AAGrD,UAAYL,EAAEK,IAAF,CAAZ,CACAC,MAAQmM,SAASnM,KAAT,CAAgBN,CAAhB,CAAmBwM,kBAAkBlM,KAAlB,CAAyBN,CAAzB,CAA4B8M,WAA5B,CAAnB,CAAR,CAEA,YAAcxM,MAAMwK,MAAN,EAAd,CACA,aAAe+B,aAAavM,KAAb,CAAf,CAEA+M,WAAWrJ,OAAX,CAAoBhE,CAApB,CAAuBsN,QAAvB,CAAiCR,WAAjC,EACA,GAAI9I,OAAJ,CAAa;;AAGXqJ,WAAWrJ,QAAQ8G,MAAR,EAAX,CAA6B9K,CAA7B,CAAgCsN,SAAW,CAA3C,CAA8CR,WAA9C,EACD,CACF,CAfD,EAiBA,QAAA,CACD;;AAID,wBAAA,CAAyB9M,CAAzB,CAA4B,CAC1B,gBAAkB3D,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,IAAtF;;AAIAkR,0BAA0BzM,OAA1B,CAAkC,SAAU9D,IAAV,CAAgB,CAChD,UAAY0I,iBAAe1I,IAAf,CAAqB,CAArB,CAAZ,CACIwQ,eAAiBC,MAAM,CAAN,CADrB,CAEIC,cAAgBD,MAAM,CAAN,CAFpB,CAIAzN,EAAEwN,eAAiB,GAAjB,CAAuBE,aAAzB,EAAwCvN,IAAxC,CAA6C,SAAUc,KAAV,CAAiBZ,IAAjB,CAAuB,CAClEsM,YAAY3M,EAAEK,IAAF,EAAQyK,MAAR,CAAe0C,cAAf,CAAZ,CAA4CxN,CAA5C,CAA+C,EAA/C,EACD,CAFD,EAGD,CARD;;;;;AAeA2N,QAAQ3N,CAAR,CAAW8M,WAAX,EACAa,QAAQ3N,CAAR,CAAW8M,WAAX,EAEA,QAAA,CACD,CAED,iBAAmB,SAAnB,CAEA,wBAAA,CAAyB9B,IAAzB,CAA+B,CAC7B,YAAYxG,OAAL,CAAaoJ,YAAb,CAA2B,GAA3B,EAAgC3C,IAAhC,EAAP,CACD;;;;;AAOD,uBAAA,CAAwBxM,GAAxB,CAA6BoP,SAA7B,CAAwC,CACtC,YAAcA,UAAUzM,IAAV,CAAe,SAAU0M,EAAV,CAAc,CACzC,UAAUxP,IAAH,CAAQG,GAAR,CAAP,CACD,CAFa,CAAd,CAGA,GAAIsP,OAAJ,CAAa,CACX,eAAeC,IAAR,CAAavP,GAAb,EAAkB,CAAlB,CAAP,CACD,CAED,WAAA,CACD;;;;;;;;;;;;;;;;AAkBD,oBAAsB,UAAA,CAAW,0EAAX,CAAuF,GAAvF,CAAtB,CAEA,iBAAmB,QAAnB,CAEA,gBAAkB,WAAlB,CACA,gBAAkB,WAAlB,CAEA,uBAAA,CAAwBA,GAAxB,CAA6B,CAC3B,YAAcA,IAAI+G,KAAJ,CAAUyI,eAAV,CAAd,CACA,GAAI,CAACC,OAAL,CAAc,WAAA,CAEd,YAActE,SAASsE,QAAQ,CAAR,CAAT,CAAqB,EAArB,CAAd;;AAIA,eAAiB,GAAV,CAAgBC,OAAhB,CAA0B,IAAjC,CACD,CAED,qBAAA,CAAsB1P,GAAtB,CAA2B,CACzB,WAAWqG,KAAJ,CAAU,GAAV,EAAe,CAAf,EAAkBN,OAAlB,CAA0B,KAA1B,CAAiC,EAAjC,CAAP,CACD,CAED,sBAAA,CAAuB4J,OAAvB,CAAgCnN,KAAhC,CAAuCoN,sBAAvC,CAA+D,CAC7D,gBAAkB,IAAlB;;AAIA,GAAIpN,MAAQ,CAAR,EAAaqN,YAAYhQ,IAAZ,CAAiB8P,OAAjB,CAAb,EAA0CA,QAAQ9R,MAAR,CAAiB,CAA/D,CAAkE,CAChEiS,YAAc,IAAd,CACD;;AAID,GAAItN,QAAU,CAAV,EAAemN,QAAQI,WAAR,KAA0B,OAA7C,CAAsD,CACpDD,YAAc,KAAd,CACD;;AAID,GAAItN,MAAQ,CAAR,EAAamN,QAAQ9R,MAAR,CAAiB,CAA9B,EAAmC,CAAC+R,sBAAxC,CAAgE,CAC9DE,YAAc,KAAd,CACD,CAED,kBAAA,CACD;;;AAKD,uBAAA,CAAwB9P,GAAxB,CAA6BgQ,MAA7B,CAAqC,CACnC,cAAgBA,QAAU7P,MAAIC,KAAJ,CAAUJ,GAAV,CAA1B,CACA,aAAeC,UAAU2L,QAAzB,CACIqE,KAAOhQ,UAAUgQ,IADrB,CAEIC,KAAOjQ,UAAUiQ,IAFrB,CAKA,2BAA6B,KAA7B,CACA,oBAAsBA,KAAK7J,KAAL,CAAW,GAAX,EAAgB8J,OAAhB,GAA0BhM,MAA1B,CAAiC,SAAUC,GAAV,CAAegM,UAAf,CAA2B5N,KAA3B,CAAkC,CACvF,YAAc4N,UAAd;AAGA,GAAIT,QAAQnM,QAAR,CAAiB,GAAjB,CAAJ,CAA2B,CACzB,mBAAqBmM,QAAQtJ,KAAR,CAAc,GAAd,CAArB,CACIgK,gBAAkBpJ,iBAAeqJ,cAAf,CAA+B,CAA/B,CADtB,CAEIC,gBAAkBF,gBAAgB,CAAhB,CAFtB,CAGIG,QAAUH,gBAAgB,CAAhB,CAHd,CAKA,GAAII,YAAY5Q,IAAZ,CAAiB2Q,OAAjB,CAAJ,CAA+B,CAC7Bb,QAAUY,eAAV,CACD,CACF;;AAID,GAAIf,gBAAgB3P,IAAhB,CAAqB8P,OAArB,GAAiCnN,MAAQ,CAA7C,CAAgD,CAC9CmN,QAAUA,QAAQ5J,OAAR,CAAgByJ,eAAhB,CAAiC,EAAjC,CAAV,CACD;;;;AAMD,GAAIhN,QAAU,CAAd,CAAiB,CACfoN,uBAAyBc,aAAa7Q,IAAb,CAAkB8P,OAAlB,CAAzB,CACD;AAGD,GAAIgB,cAAchB,OAAd,CAAuBnN,KAAvB,CAA8BoN,sBAA9B,CAAJ,CAA2D,CACzDxL,IAAIwM,IAAJ,CAASjB,OAAT,EACD,CAED,UAAA,CACD,CAnCqB,CAmCnB,EAnCmB,CAAtB,CAqCA,gBAAkB,IAAX,CAAkBM,IAAlB,CAAyBY,gBAAgBV,OAAhB,GAA0BtR,IAA1B,CAA+B,GAA/B,CAAhC,CACD;;AAID,oBAAsB,UAAA,CAAW,QAAX,CAAtB,CACA,uBAAA,CAAwB0N,IAAxB,CAA8B,CAC5B,uBAAuB1M,IAAhB,CAAqB0M,IAArB,CAAP,CACD,CAED,uBAAA,CAAwB5I,OAAxB,CAAiC,CACnB,UAAY/F,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,EAAhF,CAEA,eAAe4O,IAAR,GAAenG,KAAf,CAAqB,KAArB,EAA4BwH,KAA5B,CAAkC,CAAlC,CAAqCiD,KAArC,EAA4CjS,IAA5C,CAAiD,GAAjD,CAAP,CACb;;;;;AAOD,sBAAA,CAAuBkS,UAAvB,CAAmCC,QAAnC,CAA6CzP,CAA7C,CAAgD,CAC9C,GAAI,CAACwP,WAAW1E,MAAX,GAAoBxO,MAAzB,CAAiC,CAC/B,iBAAA,CACD,CAED,0BAA4B4P,KAAKC,GAAL,CAAS,EAAT,CAAasD,SAAW,IAAxB,CAA5B,CACA,gBAAkBzP,EAAE,aAAF,CAAlB,CAEAwP,WAAW1E,MAAX,GAAoBxI,QAApB,GAA+BnC,IAA/B,CAAoC,SAAUc,KAAV,CAAiB2H,OAAjB,CAA0B,CAC5D,aAAe5I,EAAE4I,OAAF,CAAf;AAEA,GAAI8G,4BAA4BpR,IAA5B,CAAiCsK,QAAQhF,OAAzC,CAAJ,CAAuD,CACrD,WAAA,CACD,CAED,iBAAmBgJ,SAAS+C,QAAT,CAAnB,CACA,GAAIC,YAAJ,CAAkB,CAChB,GAAID,WAAaH,UAAjB,CAA6B,CAC3BK,YAAY1L,MAAZ,CAAmBwL,QAAnB,EACD,CAFD,IAEO,CACL,iBAAmB,CAAnB,CACA,YAAcG,YAAYH,QAAZ,CAAd;;AAIA,GAAII,QAAU,IAAd,CAAoB,CAClBC,cAAgB,EAAhB,CACD;;AAID,GAAID,SAAW,GAAf,CAAoB,CAClBC,cAAgB,EAAhB,CACD;;AAID,GAAIL,SAASpP,IAAT,CAAc,OAAd,IAA2BiP,WAAWjP,IAAX,CAAgB,OAAhB,CAA/B,CAAyD,CACvDyP,cAAgBP,SAAW,GAA3B,CACD,CAED,aAAeG,aAAeI,YAA9B,CAEA,GAAIC,UAAYC,qBAAhB,CAAuC,CACrC,mBAAmB/L,MAAZ,CAAmBwL,QAAnB,CAAP,CACD,CAFD,QAEW/G,QAAQhF,OAAR,GAAoB,GAAxB,CAA6B,CAClC,mBAAqB+L,SAAS3E,IAAT,EAArB,CACA,yBAA2Ba,WAAWsE,cAAX,CAA3B,CAEA,GAAIC,qBAAuB,EAAvB,EAA6BL,QAAU,IAA3C,CAAiD,CAC/C,mBAAmB5L,MAAZ,CAAmBwL,QAAnB,CAAP,CACD,CAFD,QAEWS,sBAAwB,EAAxB,EAA8BL,UAAY,CAA1C,EAA+CM,eAAeF,cAAf,CAAnD,CAAmF,CACxF,mBAAmBhM,MAAZ,CAAmBwL,QAAnB,CAAP,CACD,CACF,CACF,CACF,CAED,WAAA,CACD,CAnDD,EAqDA,kBAAA,CACD;;AAID,4BAAA,CAA6B3P,CAA7B,CAAgC,CAC9B,eAAiB,MAAjB,CACA,aAAe,CAAf,CAEAA,EAAE,SAAF,EAAaG,IAAb,CAAkB,SAAUc,KAAV,CAAiBZ,IAAjB,CAAuB;AAEvC,GAAIqP,4BAA4BpR,IAA5B,CAAiC+B,KAAKuD,OAAtC,CAAJ,CAAoD,CAClD,OACD,CAED,UAAY5D,EAAEK,IAAF,CAAZ,CACA,UAAYuM,SAAStM,KAAT,CAAZ,CAEA,GAAImL,MAAQgE,QAAZ,CAAsB,CACpBA,SAAWhE,KAAX,CACA+D,WAAalP,KAAb,CACD,CACF,CAbD;;AAiBA,GAAI,CAACkP,UAAL,CAAiB,CACf,SAAS,MAAF,GAAaxP,EAAE,GAAF,EAAOsQ,KAAP,EAApB,CACD,CAEDd,WAAae,cAAcf,UAAd,CAA0BC,QAA1B,CAAoCzP,CAApC,CAAb,CAEA,iBAAA,CACD;AAID,4BAAA,CAA6BM,KAA7B,CAAoCN,CAApC,CAAuCwQ,MAAvC,CAA+C;;;;AAK7C,GAAIlQ,MAAMmQ,QAAN,CAAe,qBAAf,CAAJ,CAA2C,CACzC,OACD,CAED,YAAcC,gBAAgBpQ,MAAM0K,IAAN,EAAhB,CAAd,CAEA,GAAIoB,YAAYhK,OAAZ,EAAuB,EAA3B,CAA+B,CAC7B,WAAapC,EAAE,GAAF,CAAOM,KAAP,EAAchE,MAA3B,CACA,eAAiB0D,EAAE,OAAF,CAAWM,KAAX,EAAkBhE,MAAnC;AAGA,GAAIqU,WAAaC,OAAS,CAA1B,CAA6B,CAC3BtQ,MAAMkB,MAAN,GACA,OACD,CAED,kBAAoBY,QAAQ9F,MAA5B,CACA,aAAe0D,EAAE,KAAF,CAASM,KAAT,EAAgBhE,MAA/B;;AAIA,GAAI8B,cAAgB,EAAhB,EAAsByS,WAAa,CAAvC,CAA0C,CACxCvQ,MAAMkB,MAAN,GACA,OACD,CAED,YAAcsO,YAAYxP,KAAZ,CAAd;;;AAKA,GAAIkQ,OAAS,EAAT,EAAeT,QAAU,GAAzB,EAAgC3R,cAAgB,EAApD,CAAwD,CACtDkC,MAAMkB,MAAN,GACA,OACD;;AAID,GAAIgP,QAAU,EAAV,EAAgBT,QAAU,GAA9B,CAAmC;;;AAIjC,YAAczP,MAAMqD,GAAN,CAAU,CAAV,EAAaC,OAA3B,CACA,eAAiBA,UAAY,IAAZ,EAAoBA,UAAY,IAAjD,CACA,GAAIkN,UAAJ,CAAgB,CACd,iBAAmBxQ,MAAM5D,IAAN,EAAnB,CACA,GAAIqU,cAAgBL,gBAAgBK,aAAa/F,IAAb,EAAhB,EAAqCsB,KAArC,CAA2C,CAAC,CAA5C,IAAmD,GAAvE,CAA4E,CAC1E,OACD,CACF,CAEDhM,MAAMkB,MAAN,GACA,OACD,CAED,gBAAkBxB,EAAE,QAAF,CAAYM,KAAZ,EAAmBhE,MAArC;AAGA,GAAI0U,YAAc,CAAd,EAAmB5S,cAAgB,GAAvC,CAA4C,CAC1CkC,MAAMkB,MAAN,GACA,OACD,CACF,CACF;;;;;;;AASD,qBAAA,CAAsBuI,QAAtB,CAAgC/J,CAAhC,CAAmC,CACjCA,EAAEiR,wBAAF,CAA4BlH,QAA5B,EAAsC5J,IAAtC,CAA2C,SAAUc,KAAV,CAAiBZ,IAAjB,CAAuB,CAChE,UAAYL,EAAEK,IAAF,CAAZ,CACA,WAAauM,SAAStM,KAAT,CAAb,CACA,GAAI,CAACkQ,MAAL,CAAa,CACXA,OAAShE,kBAAkBlM,KAAlB,CAAyBN,CAAzB,CAAT,CACAyM,SAASnM,KAAT,CAAgBN,CAAhB,CAAmBwQ,MAAnB,EACD;AAGD,GAAIA,OAAS,CAAb,CAAgB,CACdlQ,MAAMkB,MAAN,GACD,CAFD,IAEO;AAEL0P,oBAAoB5Q,KAApB,CAA2BN,CAA3B,CAA8BwQ,MAA9B,EACD,CACF,CAfD,EAiBA,QAAA,CACD,CAED,qBAAA,CAAsBzG,QAAtB,CAAgC/J,CAAhC,CAAmC,CACjC,UAAY3D,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,EAAhF,CAEA2D,EAAEmR,eAAF,CAAmBpH,QAAnB,EAA6B5J,IAA7B,CAAkC,SAAUc,KAAV,CAAiBmQ,MAAjB,CAAyB,CACzD,YAAcpR,EAAEoR,MAAF,CAAd;;;;AAKA,GAAIpR,EAAEqR,OAAF,CAAWtH,QAAX,EAAqBuH,OAArB,CAA6B,GAA7B,EAAkChV,MAAlC,GAA6C,CAAjD,CAAoD,CAClD,eAAekF,MAAR,EAAP,CACD;AAGD,GAAIkP,gBAAgB1Q,EAAEoR,MAAF,EAAUpG,IAAV,EAAhB,IAAsCzH,KAA1C,CAAiD,CAC/C,eAAe/B,MAAR,EAAP,CACD;;AAID,GAAIuL,UAAU/M,EAAEoR,MAAF,CAAV,EAAuB,CAA3B,CAA8B,CAC5B,eAAe5P,MAAR,EAAP,CACD,CAED,cAAA,CACD,CAtBD,EAwBA,QAAA,CACD;;AAKD,2BAAA,CAA4B0I,OAA5B,CAAqClK,CAArC,CAAwC;;;AAItCA,EAAIoJ,cAAcpJ,EAAE,MAAF,CAAd,CAAyBA,CAAzB,CAA4B,KAA5B,CAAJ,CACAA,EAAIoJ,cAAcpJ,EAAE,MAAF,CAAd,CAAyBA,CAAzB,CAA4B,KAA5B,CAAJ,CAEA,QAAA,CACD,CAED,mBAAA,CAAoBA,CAApB,CAAuBuR,OAAvB,CAAgChR,IAAhC,CAAsCiR,QAAtC,CAAgD,CAC9CxR,EAAE,IAAMO,IAAN,CAAa,GAAf,CAAoBiR,QAApB,EAA8BrR,IAA9B,CAAmC,SAAUC,CAAV,CAAaC,IAAb,CAAmB,CACpD,QAAUA,KAAKQ,OAAL,CAAaN,IAAb,CAAV,CACA,gBAAkB3B,MAAIpB,OAAJ,CAAY+T,OAAZ,CAAqB9S,GAArB,CAAlB,CAEA4B,KAAKQ,OAAL,CAAaN,IAAb,EAAqBkR,WAArB,CACD,CALD,EAMD,CAED,0BAAA,CAA2BD,QAA3B,CAAqCxR,CAArC,CAAwCvB,GAAxC,CAA6C,CAC3C,CAAC,MAAD,CAAS,KAAT,EAAgBqC,OAAhB,CAAwB,SAAUP,IAAV,CAAgB,CACtC,kBAAkBP,CAAX,CAAcvB,GAAd,CAAmB8B,IAAnB,CAAyBiR,QAAzB,CAAP,CACD,CAFD,EAIA,eAAA,CACD,CAED,mBAAA,CAAoBxG,IAApB,CAA0B,CACxB,YAAYC,IAAL,GAAYzG,OAAZ,CAAoB,MAApB,CAA4B,GAA5B,EAAiClI,MAAxC,CACD;;;AAKD,oBAAA,CAAqBgE,KAArB,CAA4B,CAC1B,oBAAsBuL,WAAWvL,MAAM0K,IAAN,EAAX,CAAtB,CAEA,aAAe1K,MAAMc,IAAN,CAAW,GAAX,EAAgB4J,IAAhB,EAAf,CACA,eAAiBa,WAAW6F,QAAX,CAAjB,CAEA,GAAIC,gBAAkB,CAAtB,CAAyB,CACvB,kBAAoBA,eAApB,CACD,CAFD,QAEWA,kBAAoB,CAApB,EAAyBC,WAAa,CAA1C,CAA6C,CAClD,QAAA,CACD,CAED,QAAA,CACD;;AAKD,2BAAA,CAA4B5R,CAA5B,CAA+B6R,SAA/B,CAA0CC,WAA1C,CAAuD,CACrD,iBAAmBzV,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,IAAvF,CAEA,eAAiBwV,UAAUvQ,MAAV,CAAiB,SAAUyQ,IAAV,CAAgB,CAChD,mBAAmBC,OAAZ,CAAoBD,IAApB,IAA8B,CAAC,CAAtC,CACD,CAFgB,CAAjB,CAIA,8BAAgC,IAAhC,CACA,sBAAwB,KAAxB,CACA,mBAAqBxV,SAArB,CAEA,GAAI,CACF,UAAY,cAAA,EAAiB,CAC3B,SAAW0V,MAAMzR,KAAjB,CAEA,SAAW,MAAX,CACA,UAAY,OAAZ,CAEA,UAAYR,EAAE,QAAUkB,IAAV,CAAiB,IAAjB,CAAwB6Q,IAAxB,CAA+B,IAAjC,CAAZ;;;AAKA,WAAaG,MAAMhW,GAAN,CAAU,SAAU+E,KAAV,CAAiBZ,IAAjB,CAAuB,CAC5C,SAASA,IAAF,EAAQE,IAAR,CAAaC,KAAb,CAAP,CACD,CAFY,EAEV2R,OAFU,GAEA7Q,MAFA,CAEO,SAAU0J,IAAV,CAAgB,CAClC,cAAgB,EAAhB,CACD,CAJY,CAAb;;;;AAUA,GAAIoH,OAAO9V,MAAP,GAAkB,CAAtB,CAAyB,CACvB,cAAgB,MAAhB;;AAGA,GAAI+V,YAAJ,CAAkB,CAChBC,UAAYC,UAAUH,OAAO,CAAP,CAAV,CAAqBpS,CAArB,CAAZ,CACD,CAFD,IAEO,CACLsS,UAAYF,OAAO,CAAP,CAAZ,CACD,CAED,MAAO,CACLI,EAAGF,SADE,CAAP,CAGD,CACF,CAnCD,CAqCA,IAAK,cAAgBG,eAAaC,UAAb,CAAhB,CAA0CT,KAA/C,CAAsD,EAAEU,0BAA4B,CAACV,MAAQW,UAAUjW,IAAV,EAAT,EAA2BkW,IAAzD,CAAtD,CAAsHF,0BAA4B,IAAlJ,CAAwJ,CACtJ,SAAWG,OAAX,CAEA,GAAI,CAAC,WAAA,GAAgB,WAAhB,CAA8B,WAA9B,CAA4CC,UAAQC,IAAR,CAA7C,IAAgE,QAApE,CAA8E,YAAYR,CAAZ,CAC/E;CAGD,MAAO7U,GAAP,CAAY,CACZsV,kBAAoB,IAApB,CACAC,eAAiBvV,GAAjB,CACD,CAhDD,OAgDU,CACR,GAAI,CACF,GAAI,CAACgV,yBAAD,EAA8BC,UAAUO,MAA5C,CAAoD,CAClDP,UAAUO,MAAV,GACD,CACF,CAJD,OAIU,CACR,GAAIF,iBAAJ,CAAuB,CACrB,oBAAA,CACD,CACF,CACF,CAED,WAAA,CACD,CAED,mBAAA,CAAoB3S,KAApB,CAA2B8S,WAA3B,CAAwC;;AAGtC,GAAI9S,MAAMgC,QAAN,GAAiBhG,MAAjB,CAA0B8W,WAA9B,CAA2C,CACzC,YAAA,CACD;AAED,GAAIC,cAAc/S,KAAd,CAAJ,CAA0B,CACxB,YAAA,CACD,CAED,WAAA,CACD;;;AAKD,gCAAA,CAAiCN,CAAjC,CAAoCmD,SAApC,CAA+C,CAC7C,gBAAkB9G,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,CAAtF,CACA,aAAeA,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,IAAnF,CACA,8BAAgC,IAAhC,CACA,sBAAwB,KAAxB,CACA,mBAAqBE,SAArB,CAEA,GAAI,CACF,IAAK,cAAgBkW,eAAatP,SAAb,CAAhB,CAAyC8O,KAA9C,CAAqD,EAAEU,0BAA4B,CAACV,MAAQW,UAAUjW,IAAV,EAAT,EAA2BkW,IAAzD,CAArD,CAAqHF,0BAA4B,IAAjJ,CAAuJ,CACrJ,aAAeV,MAAMzR,KAArB,CAEA,UAAYR,EAAEoH,QAAF,CAAZ;;AAIA,GAAI8K,MAAM5V,MAAN,GAAiB,CAArB,CAAwB,CACtB,UAAY0D,EAAEkS,MAAM,CAAN,CAAF,CAAZ,CAEA,GAAIoB,WAAWhT,KAAX,CAAkB8S,WAAlB,CAAJ,CAAoC,CAClC,YAAc,MAAd,CACA,GAAIG,QAAJ,CAAc,CACZnR,QAAU9B,MAAM0K,IAAN,EAAV,CACD,CAFD,IAEO,CACL5I,QAAU9B,MAAMkT,IAAN,EAAV,CACD,CAED,GAAIpR,OAAJ,CAAa,CACX,cAAA,CACD,CACF,CACF,CACF,CACF,CAAC,MAAOzE,GAAP,CAAY,CACZsV,kBAAoB,IAApB,CACAC,eAAiBvV,GAAjB,CACD,CA5BD,OA4BU,CACR,GAAI,CACF,GAAI,CAACgV,yBAAD,EAA8BC,UAAUO,MAA5C,CAAoD,CAClDP,UAAUO,MAAV,GACD,CACF,CAJD,OAIU,CACR,GAAIF,iBAAJ,CAAuB,CACrB,oBAAA,CACD,CACF,CACF,CAED,WAAA,CACD;AAGD,kBAAA,CAAmBjI,IAAnB,CAAyBhL,CAAzB,CAA4B;;AAG1B,cAAgBA,EAAE,SAAWgL,IAAX,CAAkB,SAApB,EAA+BA,IAA/B,EAAhB,CACA,mBAAqB,EAAd,CAAmBA,IAAnB,CAA0ByI,SAAjC,CACD,CAED,sBAAA,CAAuBnT,KAAvB,CAA8B,CAC5B,YAAcA,MAAMyD,OAAN,GAAgBoO,OAAhB,EAAd,CACA,kBAAoBpO,QAAQ3C,IAAR,CAAa,SAAU0J,MAAV,CAAkB,CACjD,eAAiBA,OAAOjK,OAAP,CAAe6S,KAAf,CAAuB,GAAvB,CAA6B5I,OAAOjK,OAAP,CAAeoH,EAA7D,CACA,kBAAkBhG,QAAX,CAAoB,SAApB,CAAP,CACD,CAHmB,CAApB,CAKA,uBAAyB1F,SAAzB,CACD;;;AAMD,yBAAA,CAA0B+D,KAA1B,CAAiC,CAC/B,aAAa0K,IAAN,GAAaC,IAAb,GAAoB3O,MAApB,EAA8B,GAArC,CACD,CAED,oBAAA,CAAqB0D,CAArB,CAAwB,CACtB,SAAS2T,cAAF,EAAkBrX,MAAlB,CAA2B,CAAlC,CACD;;AAKD,oBAAsB,wCAAtB;;AAIA,iBAAmB,UAAA,CAAW,aAAX,CAA0B,GAA1B,CAAnB;;;;;;;;;;;;;;;AAoBA,mBAAqB,WAArB,CACA,oBAAsB,WAAtB,CACA,yBAA2B,4BAA3B,CACA,2BAA6B,oBAA7B,CACA,0BAA4B,QAA5B,CACA,WAAa,CAAC,KAAD,CAAQ,KAAR,CAAe,KAAf,CAAsB,KAAtB,CAA6B,KAA7B,CAAoC,KAApC,CAA2C,KAA3C,CAAkD,KAAlD,CAAyD,KAAzD,CAAgE,KAAhE,CAAuE,KAAvE,CAA8E,KAA9E,CAAb,CACA,cAAgBsX,OAAOtW,IAAP,CAAY,GAAZ,CAAhB,CACA,eAAiB,qCAAjB,CACA,eAAiB,wCAAjB,CACA,sBAAwB,UAAA,CAAW,IAAMuW,UAAN,CAAmB,KAAnB,CAA2BC,UAA3B,CAAwC,kBAAxC,CAA6DC,SAA7D,CAAyE,GAApF,CAAyF,IAAzF,CAAxB;;;AAKA,uBAAyB,gBAAzB,CAEA,sBAAwB,UAAA,CAAW,2BAAX,CAAwC,GAAxC,CAAxB;;AAIA,oBAAA,CAAqBzQ,MAArB,CAA6B,CAC3B,cAAckB,OAAP,CAAewP,eAAf,CAAgC,IAAhC,EAAsC/I,IAAtC,EAAP,CACD,CAED,gBAAA,CAAiBgJ,YAAjB,CAA+B,CAC7BA,aAAeA,aAAahJ,IAAb,EAAf,CACA,GAAIiJ,WAASC,QAAT,CAAkBF,YAAlB,CAAJ,CAAqC,CACnC,mBAAA,CACD,CAED,WAAA,CACD;;AAID,iBAAA,CAAkBpQ,GAAlB,CAAuB7G,IAAvB,CAA6B,CAC3B,MAAQA,KAAKgD,CAAb,CACI4E,QAAU5H,KAAK4H,OADnB;AAIA,GAAIf,IAAIvH,MAAJ,CAAa,IAAb,EAAqBuH,IAAIvH,MAAJ,CAAa,CAAtC,CAAyC,WAAA;AAGzC,GAAIsI,SAAWwP,eAAexP,OAAf,CAAwB,EAAxB,IAAgCwP,eAAevQ,GAAf,CAAoB,EAApB,CAA/C,CAAwE,WAAA,CAExE,YAAc0O,UAAU1O,GAAV,CAAe7D,CAAf,CAAd;;AAIA,GAAIqU,aAAa/V,IAAb,CAAkBgW,OAAlB,CAAJ,CAAgC,WAAA,CAEhC,eAAerJ,IAAR,EAAP,CACD;;;AAMD,wBAAA,CAAyBsJ,UAAzB,CAAqC,CACnC,MAAO,CAACA,WAAW/O,KAAX,CAAiBgP,iBAAjB,GAAuC,EAAxC,EAA4ClX,IAA5C,CAAiD,GAAjD,EAAsDkH,OAAtD,CAA8DiQ,qBAA9D,CAAqF,GAArF,EAA0FjQ,OAA1F,CAAkGkQ,sBAAlG,CAA0H,UAA1H,EAAsIlQ,OAAtI,CAA8ImQ,oBAA9I,CAAoK,IAApK,EAA0K1J,IAA1K,EAAP,CACD;;AAID,2BAAA,CAA4BsJ,UAA5B,CAAwC;AAEtC,GAAIK,eAAetW,IAAf,CAAoBiW,UAApB,GAAmCM,gBAAgBvW,IAAhB,CAAqBiW,UAArB,CAAvC,CAAyE,CACvEA,WAAa3K,SAAS2K,UAAT,CAAqB,EAArB,CAAb,CACD,CAED,SAAWO,SAAO,QAAA,CAASP,UAAT,CAAP,CAAX,CAEA,GAAI,CAACQ,KAAKC,OAAL,EAAL,CAAqB,CACnBT,WAAaU,gBAAgBV,UAAhB,CAAb,CACAQ,KAAOD,SAAO,QAAA,CAASP,UAAT,CAAP,CAAP,CACD,CAED,YAAYS,OAAL,GAAiBD,KAAKG,WAAL,EAAjB,CAAsC,IAA7C,CACD;AAID,yBAAA,CAA0BhL,OAA1B,CAAmClN,IAAnC,CAAyC,CACvC,MAAQA,KAAKgD,CAAb,CACImV,sBAAwBnY,KAAKoY,kBADjC,CAEIA,mBAAqBD,wBAA0B5Y,SAA1B,CAAsC,IAAtC,CAA6C4Y,qBAFtE,CAGIE,WAAarY,KAAKuG,KAHtB,CAIIA,MAAQ8R,aAAe9Y,SAAf,CAA2B,EAA3B,CAAgC8Y,UAJ5C,CAKIC,SAAWtY,KAAKyB,GALpB,CAMIA,IAAM6W,WAAa/Y,SAAb,CAAyB,EAAzB,CAA8B+Y,QANxC,CAOIC,oBAAsBvY,KAAK8G,cAP/B,CAQIA,eAAiByR,sBAAwBhZ,SAAxB,CAAoC,IAApC,CAA2CgZ,mBARhE;;AAYAC,mBAAmBtL,OAAnB,CAA4BlK,CAA5B;;;AAKA,GAAI8D,cAAJ,CAAoB2R,YAAYvL,OAAZ,CAAqBlK,CAArB;;;AAKpB0V,WAAWxL,OAAX,CAAoBlK,CAApB,CAAuBvB,GAAvB;;AAIAkX,cAAczL,OAAd,CAAuBlK,CAAvB;;;AAKA4V,cAAc1L,OAAd,CAAuBlK,CAAvB;AAGA6V,aAAa3L,OAAb,CAAsBlK,CAAtB,CAAyBuD,KAAzB;AAGAuS,kBAAkB5L,OAAlB,CAA2BlK,CAA3B,CAA8BvB,GAA9B;;;;AAMA,GAAIqF,cAAJ,CAAoBuO,aAAanI,OAAb,CAAsBlK,CAAtB,CAAyBoV,kBAAzB;AAGpBW,YAAY7L,OAAZ,CAAqBlK,CAArB;AAGAgW,gBAAgB9L,OAAhB,CAAyBlK,CAAzB,EAEA,cAAA,CACD,CAED,sBAAA,CAAuBuD,KAAvB,CAA8BvG,IAA9B,CAAoC,CAClC,QAAUA,KAAKyB,GAAf,CACIuB,EAAIhD,KAAKgD,CADb;;AAKA,GAAIiW,mBAAmB3X,IAAnB,CAAwBiF,KAAxB,CAAJ,CAAoC,CAClCA,MAAQ2S,kBAAkB3S,KAAlB,CAAyB9E,GAAzB,CAAR,CACD;;AAID,GAAI8E,MAAMjH,MAAN,CAAe,GAAnB,CAAwB;AAEtB,OAAS0D,EAAE,IAAF,CAAT,CACA,GAAIyD,GAAGnH,MAAH,GAAc,CAAlB,CAAqB,CACnBiH,MAAQE,GAAGuH,IAAH,EAAR,CACD,CACF;AAGD,iBAAiBzH,KAAV,CAAiBvD,CAAjB,EAAoBiL,IAApB,EAAP,CACD,CAED,+BAAA,CAAgCkL,UAAhC,CAA4CnL,IAA5C,CAAkD;;;AAIhD,GAAImL,WAAW7Z,MAAX,EAAqB,CAAzB,CAA4B,CAC1B,SAAW,UAAY;;;AAIrB,eAAiB6Z,WAAWvT,MAAX,CAAkB,SAAUC,GAAV,CAAeuT,SAAf,CAA0B,CAC3DvT,IAAIuT,SAAJ,EAAiBvT,IAAIuT,SAAJ,EAAiBvT,IAAIuT,SAAJ,EAAiB,CAAlC,CAAsC,CAAvD,CACA,UAAA,CACD,CAHgB,CAGd,EAHc,CAAjB,CAKA,0BAA4BxV,mBAAiByV,UAAjB,EAA6BzT,MAA7B,CAAoC,SAAUC,GAAV,CAAeoE,GAAf,CAAoB,CAClF,GAAIpE,IAAI,CAAJ,EAASwT,WAAWpP,GAAX,CAAb,CAA8B,CAC5B,MAAO,CAACA,GAAD,CAAMoP,WAAWpP,GAAX,CAAN,CAAP,CACD,CAED,UAAA,CACD,CAN2B,CAMzB,CAAC,CAAD,CAAI,CAAJ,CANyB,CAA5B,CAOIqP,uBAAyB5Q,iBAAe6Q,qBAAf,CAAsC,CAAtC,CAP7B,CAQIC,QAAUF,uBAAuB,CAAvB,CARd,CASIG,UAAYH,uBAAuB,CAAvB,CAThB;;;;AAiBA,GAAIG,WAAa,CAAb,EAAkBD,QAAQla,MAAR,EAAkB,CAAxC,CAA2C,CACzC6Z,WAAanL,KAAKlG,KAAL,CAAW0R,OAAX,CAAb,CACD,CAED,cAAgB,CAACL,WAAW,CAAX,CAAD,CAAgBA,WAAW7J,KAAX,CAAiB,CAAC,CAAlB,CAAhB,CAAhB,CACA,eAAiBoK,UAAU9T,MAAV,CAAiB,SAAUC,GAAV,CAAehG,GAAf,CAAoB,CACpD,WAAWP,MAAJ,CAAaO,IAAIP,MAAjB,CAA0BuG,GAA1B,CAAgChG,GAAvC,CACD,CAFgB,CAEd,EAFc,CAAjB,CAIA,GAAI8Z,WAAWra,MAAX,CAAoB,EAAxB,CAA4B,CAC1B,MAAO,CACLkW,EAAGmE,UADE,CAAP,CAGD,CAED,MAAO,CACLnE,EAAGxH,IADE,CAAP,CAGD,CA5CU,EAAX,CA8CA,GAAI,CAAC,WAAA,GAAgB,WAAhB,CAA8B,WAA9B,CAA4C+H,UAAQC,IAAR,CAA7C,IAAgE,QAApE,CAA8E,YAAYR,CAAZ,CAC/E,CAED,WAAA,CACD,CAED,6BAAA,CAA8B2D,UAA9B,CAA0C1X,GAA1C,CAA+C;;;;;;AAO7C,eAAiBG,MAAIC,KAAJ,CAAUJ,GAAV,CAAjB,CACIiQ,KAAOpE,WAAWoE,IADtB,CAGA,gBAAkBA,KAAKlK,OAAL,CAAaoS,iBAAb,CAAgC,EAAhC,CAAlB,CAEA,cAAgBT,WAAW,CAAX,EAAc3H,WAAd,GAA4BhK,OAA5B,CAAoC,GAApC,CAAyC,EAAzC,CAAhB,CACA,mBAAqBqS,QAAMC,WAAN,CAAkBC,SAAlB,CAA6BC,WAA7B,CAArB,CAEA,GAAIC,eAAiB,GAAjB,EAAwBF,UAAUza,MAAV,CAAmB,CAA/C,CAAkD,CAChD,kBAAkBgQ,KAAX,CAAiB,CAAjB,EAAoBhP,IAApB,CAAyB,EAAzB,CAAP,CACD,CAED,YAAc6Y,WAAW7J,KAAX,CAAiB,CAAC,CAAlB,EAAqB,CAArB,EAAwBkC,WAAxB,GAAsChK,OAAtC,CAA8C,GAA9C,CAAmD,EAAnD,CAAd,CACA,iBAAmBqS,QAAMC,WAAN,CAAkBI,OAAlB,CAA2BF,WAA3B,CAAnB,CAEA,GAAIG,aAAe,GAAf,EAAsBD,QAAQ5a,MAAR,EAAkB,CAA5C,CAA+C,CAC7C,kBAAkBgQ,KAAX,CAAiB,CAAjB,CAAoB,CAAC,CAArB,EAAwBhP,IAAxB,CAA6B,EAA7B,CAAP,CACD,CAED,WAAA,CACD;;AAID,0BAAA,CAA2BiG,KAA3B,CAAkC,CAChC,QAAUlH,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,EAA9E;;AAIA,eAAiBkH,MAAMuB,KAAN,CAAYmR,kBAAZ,CAAjB,CACA,GAAIE,WAAW7Z,MAAX,GAAsB,CAA1B,CAA6B,CAC3B,YAAA,CACD,CAED,aAAe8a,uBAAuBjB,UAAvB,CAAmC5S,KAAnC,CAAf,CACA,GAAI8T,QAAJ,CAAc,eAAA,CAEdA,SAAWC,qBAAqBnB,UAArB,CAAiC1X,GAAjC,CAAX,CACA,GAAI4Y,QAAJ,CAAc,eAAA;;AAId,YAAA,CACD,CAED,aAAe,CACb/T,OAAQiU,WADK,CAEb7S,eAAgB8S,OAFH,CAGb3T,IAAK4T,QAHQ,CAIbjU,eAAgBkU,kBAJH,CAKbtV,QAASuV,gBALI,CAMbpU,MAAOqU,aANM,CAAf;;;;;;;;;;;AAoBA,wBAAA,CAAyB5X,CAAzB,CAA4B6X,IAA5B,CAAkC;;;;AAMhC,GAAIA,KAAKC,uBAAT,CAAkC,CAChC9X,EAAI8X,wBAAwB9X,CAAxB,CAAJ,CACD,CAEDA,EAAI+X,uBAAuB/X,CAAvB,CAAJ,CACAA,EAAIgY,gBAAgBhY,CAAhB,CAAmB6X,KAAK/K,WAAxB,CAAJ,CACA,kBAAoBmL,oBAAoBjY,CAApB,CAApB,CAEA,oBAAA,CACD,CAED,4BAA8B,CAC5BkY,YAAa,CACXJ,wBAAyB,IADd,CAEXhL,YAAa,IAFF,CAGXsI,mBAAoB,IAHT,CADe;;;;;;;;;;;;;;;;;;;AA0B5B+C,QAAS,gBAAA,CAAiBnb,IAAjB,CAAuB6a,IAAvB,CAA6B,CACpC,MAAQ7a,KAAKgD,CAAb,CACIwT,KAAOxW,KAAKwW,IADhB,CAEIjQ,MAAQvG,KAAKuG,KAFjB,CAGI9E,IAAMzB,KAAKyB,GAHf,CAKAoZ,KAAO9Y,WAAS,EAAT,CAAa,KAAKmZ,WAAlB,CAA+BL,IAA/B,CAAP,CAEA7X,EAAIA,GAAKkC,UAAQC,IAAR,CAAaqR,IAAb,CAAT;;AAIA,SAAW,KAAK4E,cAAL,CAAoBpY,CAApB,CAAuBuD,KAAvB,CAA8B9E,GAA9B,CAAmCoZ,IAAnC,CAAX,CAEA,GAAIQ,iBAAiBhY,IAAjB,CAAJ,CAA4B,CAC1B,YAAYiY,kBAAL,CAAwBjY,IAAxB,CAA8BL,CAA9B,CAAP,CACD;;AAID,8BAAgC,IAAhC,CACA,sBAAwB,KAAxB,CACA,mBAAqBzD,SAArB,CAEA,GAAI,CACF,IAAK,cAAgBkW,eAAa7R,mBAAiBiX,IAAjB,EAAuBvW,MAAvB,CAA8B,SAAUiX,CAAV,CAAa,CAC3E,YAAYA,CAAL,IAAY,IAAnB,CACD,CAFiC,CAAb,CAAhB,CAEAtG,KAFL,CAEY,EAAEU,0BAA4B,CAACV,MAAQW,UAAUjW,IAAV,EAAT,EAA2BkW,IAAzD,CAFZ,CAE4EF,0BAA4B,IAFxG,CAE8G,CAC5G,QAAUV,MAAMzR,KAAhB,CAEAqX,KAAK5Q,GAAL,EAAY,KAAZ,CACAjH,EAAIkC,UAAQC,IAAR,CAAaqR,IAAb,CAAJ,CAEAnT,KAAO,KAAK+X,cAAL,CAAoBpY,CAApB,CAAuBuD,KAAvB,CAA8B9E,GAA9B,CAAmCoZ,IAAnC,CAAP,CAEA,GAAIQ,iBAAiBhY,IAAjB,CAAJ,CAA4B,CAC1B,MACD,CACF,CACF,CAAC,MAAO1C,GAAP,CAAY,CACZsV,kBAAoB,IAApB,CACAC,eAAiBvV,GAAjB,CACD,CAlBD,OAkBU,CACR,GAAI,CACF,GAAI,CAACgV,yBAAD,EAA8BC,UAAUO,MAA5C,CAAoD,CAClDP,UAAUO,MAAV,GACD,CACF,CAJD,OAIU,CACR,GAAIF,iBAAJ,CAAuB,CACrB,oBAAA,CACD,CACF,CACF,CAED,YAAYqF,kBAAL,CAAwBjY,IAAxB,CAA8BL,CAA9B,CAAP,CACD,CAjF2B;AAqF5BoY,eAAgB,uBAAA,CAAwBpY,CAAxB,CAA2BuD,KAA3B,CAAkC9E,GAAlC,CAAuCoZ,IAAvC,CAA6C,CAC3D,wBAAwBW,gBAAgBxY,CAAhB,CAAmB6X,IAAnB,CAAjB,CAA2C,CAChD7X,EAAGA,CAD6C,CAEhDoV,mBAAoByC,KAAKzC,kBAFuB,CAGhD7R,MAAOA,KAHyC,CAIhD9E,IAAKA,GAJ2C,CAA3C,CAAP,CAMD,CA5F2B;;;AAkG5B6Z,mBAAoB,2BAAA,CAA4BjY,IAA5B,CAAkCL,CAAlC,CAAqC,CACvD,GAAI,CAACK,IAAL,CAAW,CACT,WAAA,CACD,CAED,uBAAuBL,EAAEwT,IAAF,CAAOnT,IAAP,CAAhB,CAAP;;;;CAvG0B,CAA9B;;;;;;AAuHA,2BAA6B,CAAC,iBAAD,CAAoB,UAApB,CAAgC,SAAhC,CAA2C,UAA3C,CAAuD,OAAvD,CAA7B;;AAIA,yBAA2B,CAAC,UAAD,CAA3B;;;;;;;AASA,2BAA6B,CAAC,sBAAD,CAAyB,kBAAzB,CAA6C,kBAA7C,CAAiE,YAAjE,CAA+E,mBAA/E,CAAoG,cAApG,CAA7B,CAEA,yBAA2B,CAAC,YAAD,CAAe,cAAf,CAA+B,cAA/B,CAA+C,aAA/C,CAA8D,aAA9D,CAA6E,aAA7E,CAA4F,aAA5F,CAA2G,eAA3G,CAA4H,eAA5H,CAA6I,iBAA7I,CAAgK,UAAhK,CAA4K,YAA5K,CAA0L,IAA1L,CAAgM,iBAAhM,CAAmN,OAAnN,CAA3B,CAEA,0BAA4B,CAC1B8X,QAAS,gBAAA,CAAiBnb,IAAjB,CAAuB,CAC9B,MAAQA,KAAKgD,CAAb,CACIvB,IAAMzB,KAAKyB,GADf,CAEIga,UAAYzb,KAAKyb,SAFrB;;AAMA,UAAY,MAAZ,CAEAlV,MAAQmV,mBAAmB1Y,CAAnB,CAAsB2Y,sBAAtB,CAA8CF,SAA9C,CAAR,CACA,GAAIlV,KAAJ,CAAW,qBAAqBA,KAAd,CAAqB,CAAE9E,IAAKA,GAAP,CAAYuB,EAAGA,CAAf,CAArB,CAAP;;AAIXuD,MAAQqV,wBAAwB5Y,CAAxB,CAA2B6Y,sBAA3B,CAAR,CACA,GAAItV,KAAJ,CAAW,qBAAqBA,KAAd,CAAqB,CAAE9E,IAAKA,GAAP,CAAYuB,EAAGA,CAAf,CAArB,CAAP;AAGXuD,MAAQmV,mBAAmB1Y,CAAnB,CAAsB8Y,oBAAtB,CAA4CL,SAA5C,CAAR,CACA,GAAIlV,KAAJ,CAAW,qBAAqBA,KAAd,CAAqB,CAAE9E,IAAKA,GAAP,CAAYuB,EAAGA,CAAf,CAArB,CAAP;AAGXuD,MAAQqV,wBAAwB5Y,CAAxB,CAA2B+Y,oBAA3B,CAAR,CACA,GAAIxV,KAAJ,CAAW,qBAAqBA,KAAd,CAAqB,CAAE9E,IAAKA,GAAP,CAAYuB,EAAGA,CAAf,CAArB,CAAP;AAGX,MAAO,EAAP,CACD,CA5ByB,CAA5B;;;;;;AAqCA,qBAAuB,CAAC,KAAD,CAAQ,OAAR,CAAiB,WAAjB,CAA8B,eAA9B,CAA+C,YAA/C,CAA6D,WAA7D,CAA0E,SAA1E,CAAvB,CAEA,sBAAwB,GAAxB;;;;;;;AASA,qBAAuB,CAAC,sBAAD,CAAyB,mBAAzB,CAA8C,oBAA9C,CAAoE,mBAApE,CAAyF,oBAAzF,CAA+G,qBAA/G,CAAsI,aAAtI,CAAqJ,iBAArJ,CAAwK,oBAAxK,CAA8L,qBAA9L,CAAqN,eAArN,CAAsO,YAAtO,CAAoP,YAApP,CAAkQ,cAAlQ,CAAkR,cAAlR,CAAkS,yBAAlS,CAA6T,qBAA7T,CAAoV,qBAApV,CAA2W,SAA3W,CAAsX,SAAtX,CAAiY,gBAAjY,CAAmZ,gBAAnZ,CAAqa,SAAra,CAAvB;;AAIA,aAAe,aAAf,CACA,wBAA0B,CAAC,CAAC,SAAD,CAAYgZ,QAAZ,CAAD,CAAwB,CAAC,SAAD,CAAYA,QAAZ,CAAxB,CAA1B,CAEA,2BAA6B,CAC3Bb,QAAS,gBAAA,CAAiBnb,IAAjB,CAAuB,CAC9B,MAAQA,KAAKgD,CAAb,CACIyY,UAAYzb,KAAKyb,SADrB,CAGA,WAAa,MAAb;;AAIAnV,OAASoV,mBAAmB1Y,CAAnB,CAAsBiZ,gBAAtB,CAAwCR,SAAxC,CAAT,CACA,GAAInV,QAAUA,OAAOhH,MAAP,CAAgB4c,iBAA9B,CAAiD,CAC/C,mBAAmB5V,MAAZ,CAAP,CACD;AAGDA,OAASsV,wBAAwB5Y,CAAxB,CAA2BmZ,gBAA3B,CAA6C,CAA7C,CAAT,CACA,GAAI7V,QAAUA,OAAOhH,MAAP,CAAgB4c,iBAA9B,CAAiD,CAC/C,mBAAmB5V,MAAZ,CAAP,CACD;;AAID,8BAAgC,IAAhC,CACA,sBAAwB,KAAxB,CACA,mBAAqB/G,SAArB,CAEA,GAAI,CACF,IAAK,cAAgBkW,eAAa2G,mBAAb,CAAhB,CAAmDnH,KAAxD,CAA+D,EAAEU,0BAA4B,CAACV,MAAQW,UAAUjW,IAAV,EAAT,EAA2BkW,IAAzD,CAA/D,CAA+HF,0BAA4B,IAA3J,CAAiK,CAC/J,UAAYV,MAAMzR,KAAlB,CAEA,UAAYkF,iBAAe2T,KAAf,CAAsB,CAAtB,CAAZ,CAEA,aAAe1a,MAAM,CAAN,CAAf,CACA,UAAYA,MAAM,CAAN,CAAZ,CAEA,SAAWqB,EAAEoH,QAAF,CAAX,CACA,GAAI/G,KAAK/D,MAAL,GAAgB,CAApB,CAAuB,CACrB,SAAW+D,KAAK2K,IAAL,EAAX,CACA,GAAIsO,MAAMhb,IAAN,CAAW0M,IAAX,CAAJ,CAAsB,CACpB,mBAAmBA,IAAZ,CAAP,CACD,CACF,CACF,CACF,CAAC,MAAOrN,GAAP,CAAY,CACZsV,kBAAoB,IAApB,CACAC,eAAiBvV,GAAjB,CACD,CApBD,OAoBU,CACR,GAAI,CACF,GAAI,CAACgV,yBAAD,EAA8BC,UAAUO,MAA5C,CAAoD,CAClDP,UAAUO,MAAV,GACD,CACF,CAJD,OAIU,CACR,GAAIF,iBAAJ,CAAuB,CACrB,oBAAA,CACD,CACF,CACF,CAED,WAAA,CACD,CA3D0B,CAA7B;;;;AAkEA,6BAA+B,CAAC,wBAAD,CAA2B,aAA3B,CAA0C,SAA1C,CAAqD,gBAArD,CAAuE,WAAvE,CAAoF,cAApF,CAAoG,UAApG,CAAgH,UAAhH,CAA4H,SAA5H,CAAuI,eAAvI,CAAwJ,UAAxJ,CAAoK,cAApK,CAAoL,qBAApL,CAA2M,cAA3M,CAA2N,SAA3N,CAAsO,MAAtO,CAA/B;;;AAKA,6BAA+B,CAAC,4BAAD,CAA+B,oBAA/B,CAAqD,0BAArD,CAAiF,kBAAjF,CAAqG,oBAArG,CAA2H,kBAA3H,CAA+I,iBAA/I,CAAkK,aAAlK,CAAiL,eAAjL,CAAkM,qBAAlM,CAAyN,mBAAzN,CAA8O,cAA9O,CAA8P,aAA9P,CAA6Q,YAA7Q,CAA2R,kBAA3R,CAA+S,WAA/S,CAA4T,UAA5T,CAA/B;;;AAKA,oBAAsB,mDAAtB,CACA,2BAA6B;AAE7B,UAAA,CAAW,4BAAX,CAAyC,GAAzC,CAF6B;;;AAM7B,UAAA,CAAW,6BAAX,CAA0C,GAA1C,CAN6B;AAQ7B,UAAA,CAAW,cAAgBsG,eAAhB,CAAkC,aAA7C,CAA4D,GAA5D,CAR6B,CAA7B,CAUA,kCAAoC,CAClCpB,QAAS,gBAAA,CAAiBnb,IAAjB,CAAuB,CAC9B,MAAQA,KAAKgD,CAAb,CACIvB,IAAMzB,KAAKyB,GADf,CAEIga,UAAYzb,KAAKyb,SAFrB,CAIA,kBAAoB,MAApB;;;AAIAe,cAAgBd,mBAAmB1Y,CAAnB,CAAsByZ,wBAAtB,CAAgDhB,SAAhD,CAA2D,KAA3D,CAAhB,CACA,GAAIe,aAAJ,CAAmB,0BAA0BA,aAAnB,CAAP;;AAInBA,cAAgBZ,wBAAwB5Y,CAAxB,CAA2B0Z,wBAA3B,CAAhB,CACA,GAAIF,aAAJ,CAAmB,0BAA0BA,aAAnB,CAAP;AAGnBA,cAAgBG,eAAelb,GAAf,CAAoBmb,sBAApB,CAAhB,CACA,GAAIJ,aAAJ,CAAmB,0BAA0BA,aAAnB,CAAP,CAEnB,WAAA,CACD,CAvBiC,CAApC;;;;;;;;;;;;;;AA2CA,wBAA0B;AAExBrB,QAAS,gBAAA,EAAmB,CAC1B,WAAA,CACD,CAJuB,CAA1B;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAsCA,6BAA+B,CAAC,UAAD,CAAa,eAAb,CAA8B,WAA9B,CAA/B,CAEA,6BAA+B,CAAC,qBAAD,CAA/B,CAEA,kCAAoC,CAAC,QAAD,CAAW,YAAX,CAAyB,OAAzB,CAAkC,OAAlC,CAA2C,UAA3C,CAApC,CACA,qCAAuC,UAAA,CAAW0B,8BAA8Bvc,IAA9B,CAAmC,GAAnC,CAAX,CAAoD,GAApD,CAAvC,CAEA,kCAAoC,CAAC,QAAD,CAAW,QAAX,CAAqB,OAArB,CAA8B,UAA9B,CAA0C,UAA1C,CAAsD,MAAtD,CAA8D,IAA9D,CAAoE,YAApE,CAAkF,MAAlF,CAA0F,QAA1F,CAAoG,QAApG,CAA8G,KAA9G,CAAqH,QAArH,CAA+H,SAA/H,CAA0I,QAA1I,CAAoJ,SAApJ,CAA+J,SAA/J,CAA0K,QAA1K,CAAoL,OAApL,CAA6L,UAA7L,CAAyM,SAAzM,CAAoN,OAApN,CAA6N,OAA7N,CAAsO,KAAtO,CAA6O,aAA7O,CAApC,CACA,qCAAuC,UAAA,CAAWwc,8BAA8Bxc,IAA9B,CAAmC,GAAnC,CAAX,CAAoD,GAApD,CAAvC,CAEA,WAAa,gBAAb,CACA,WAAa,kBAAb,CAEA,eAAA,CAAgBgD,KAAhB,CAAuB,CACrB,MAAO,CAACA,MAAMC,IAAN,CAAW,OAAX,GAAuB,EAAxB,EAA8B,GAA9B,EAAqCD,MAAMC,IAAN,CAAW,IAAX,GAAoB,EAAzD,CAAP,CACD;AAGD,sBAAA,CAAuB9B,GAAvB,CAA4B,CAC1BA,IAAMA,IAAIwM,IAAJ,EAAN,CACA,UAAY,CAAZ,CAEA,GAAI8O,iCAAiCzb,IAAjC,CAAsCG,GAAtC,CAAJ,CAAgD,CAC9CgN,OAAS,EAAT,CACD,CAED,GAAIuO,iCAAiC1b,IAAjC,CAAsCG,GAAtC,CAAJ,CAAgD,CAC9CgN,OAAS,EAAT,CACD;;AAID,GAAIwO,OAAO3b,IAAP,CAAYG,GAAZ,CAAJ,CAAsB,CACpBgN,OAAS,EAAT,CACD,CAED,GAAIyO,OAAO5b,IAAP,CAAYG,GAAZ,CAAJ,CAAsB,CACpBgN,OAAS,EAAT,CACD;AAID,YAAA,CACD;AAGD,kBAAA,CAAmBtG,IAAnB,CAAyB,CACvB,GAAIA,KAAK5E,IAAL,CAAU,KAAV,CAAJ,CAAsB,CACpB,QAAA,CACD,CAED,QAAA,CACD;;AAID,uBAAA,CAAwB4E,IAAxB,CAA8B,CAC5B,UAAY,CAAZ,CACA,eAAiBA,KAAKpB,OAAL,CAAa,QAAb,EAAuBuM,KAAvB,EAAjB,CAEA,GAAI6J,WAAW7d,MAAX,GAAsB,CAA1B,CAA6B,CAC3BmP,OAAS,EAAT,CACD,CAED,YAActG,KAAK2F,MAAL,EAAd,CACA,aAAe,MAAf,CACA,GAAI9G,QAAQ1H,MAAR,GAAmB,CAAvB,CAA0B,CACxB8d,SAAWpW,QAAQ8G,MAAR,EAAX,CACD,CAED,CAAC9G,OAAD,CAAUoW,QAAV,EAAoBtZ,OAApB,CAA4B,SAAUR,KAAV,CAAiB,CAC3C,GAAIqL,iBAAiBrN,IAAjB,CAAsB+b,OAAO/Z,KAAP,CAAtB,CAAJ,CAA0C,CACxCmL,OAAS,EAAT,CACD,CACF,CAJD,EAMA,YAAA,CACD;;AAID,uBAAA,CAAwBtG,IAAxB,CAA8B,CAC5B,UAAY,CAAZ,CACA,aAAeA,KAAKxI,IAAL,EAAf,CACA,YAAcgT,SAAShM,GAAT,CAAa,CAAb,CAAd,CAEA,GAAIiF,SAAWA,QAAQhF,OAAR,GAAoB,YAAnC,CAAiD,CAC/C6H,OAAS,EAAT,CACD,CAED,GAAIE,iBAAiBrN,IAAjB,CAAsB+b,OAAO1K,QAAP,CAAtB,CAAJ,CAA6C,CAC3ClE,OAAS,EAAT,CACD,CAED,YAAA,CACD,CAED,0BAAA,CAA2BtG,IAA3B,CAAiC,CAC/B,UAAY,CAAZ,CAEA,UAAYmV,WAAWnV,KAAK5E,IAAL,CAAU,OAAV,CAAX,CAAZ,CACA,WAAa+Z,WAAWnV,KAAK5E,IAAL,CAAU,QAAV,CAAX,CAAb,CACA,QAAU4E,KAAK5E,IAAL,CAAU,KAAV,CAAV;AAGA,GAAIkE,OAASA,OAAS,EAAtB,CAA0B,CACxBgH,OAAS,EAAT,CACD;AAGD,GAAI5B,QAAUA,QAAU,EAAxB,CAA4B,CAC1B4B,OAAS,EAAT,CACD,CAED,GAAIhH,OAASoF,MAAT,EAAmB,CAACtF,IAAItC,QAAJ,CAAa,QAAb,CAAxB,CAAgD,CAC9C,SAAWwC,MAAQoF,MAAnB,CACA,GAAI0Q,KAAO,IAAX,CAAiB;AAEf9O,OAAS,GAAT,CACD,CAHD,IAGO,CACLA,OAASS,KAAKsO,KAAL,CAAWD,KAAO,IAAlB,CAAT,CACD,CACF,CAED,YAAA,CACD,CAED,wBAAA,CAAyBE,KAAzB,CAAgCxZ,KAAhC,CAAuC,CACrC,aAAa3E,MAAN,CAAe,CAAf,CAAmB2E,KAA1B,CACD;;;;;;;;AAUD,iCAAmC,CACjCkX,QAAS,gBAAA,CAAiBnb,IAAjB,CAAuB,CAC9B,MAAQA,KAAKgD,CAAb,CACIoC,QAAUpF,KAAKoF,OADnB,CAEIqW,UAAYzb,KAAKyb,SAFrB,CAIA,aAAe,MAAf;;;;AAMA,aAAeC,mBAAmB1Y,CAAnB,CAAsB0a,wBAAtB,CAAgDjC,SAAhD,CAA2D,KAA3D,CAAf,CAEA,GAAIkC,QAAJ,CAAc,CACZC,SAAWpD,QAAQmD,QAAR,CAAX,CAEA,GAAIC,QAAJ,CAAc,eAAA,CACf;;;AAKD,SAAW5a,EAAE,KAAF,CAASoC,OAAT,EAAkB+P,OAAlB,EAAX,CACA,cAAgB,EAAhB,CAEA0I,KAAK/Z,OAAL,CAAa,SAAUH,GAAV,CAAeM,KAAf,CAAsB,CACjC,SAAWjB,EAAEW,GAAF,CAAX,CACA,QAAUwE,KAAK5E,IAAL,CAAU,KAAV,CAAV,CAEA,GAAI,CAACgE,GAAL,CAAU,OAEV,UAAYuW,cAAcvW,GAAd,CAAZ,CACAkH,OAASsP,UAAU5V,IAAV,CAAT,CACAsG,OAASuP,eAAe7V,IAAf,CAAT,CACAsG,OAASwP,eAAe9V,IAAf,CAAT,CACAsG,OAASyP,kBAAkB/V,IAAlB,CAAT,CACAsG,OAAS0P,gBAAgBN,IAAhB,CAAsB5Z,KAAtB,CAAT,CAEAma,UAAU7W,GAAV,EAAiBkH,KAAjB,CACD,CAdD,EAgBA,0BAA4B7K,mBAAiBwa,SAAjB,EAA4BxY,MAA5B,CAAmC,SAAUC,GAAV,CAAeoE,GAAf,CAAoB,CACjF,iBAAiBA,GAAV,EAAiBpE,IAAI,CAAJ,CAAjB,CAA0B,CAACoE,GAAD,CAAMmU,UAAUnU,GAAV,CAAN,CAA1B,CAAkDpE,GAAzD,CACD,CAF2B,CAEzB,CAAC,IAAD,CAAO,CAAP,CAFyB,CAA5B,CAGIyT,uBAAyB5Q,iBAAe6Q,qBAAf,CAAsC,CAAtC,CAH7B,CAII8E,OAAS/E,uBAAuB,CAAvB,CAJb,CAKI7G,SAAW6G,uBAAuB,CAAvB,CALf,CAOA,GAAI7G,SAAW,CAAf,CAAkB,CAChBmL,SAAWpD,QAAQ6D,MAAR,CAAX,CAEA,GAAIT,QAAJ,CAAc,eAAA,CACf;;AAID,8BAAgC,IAAhC,CACA,sBAAwB,KAAxB,CACA,mBAAqBre,SAArB,CAEA,GAAI,CACF,IAAK,cAAgBkW,eAAa6I,wBAAb,CAAhB,CAAwDrJ,KAA7D,CAAoE,EAAEU,0BAA4B,CAACV,MAAQW,UAAUjW,IAAV,EAAT,EAA2BkW,IAAzD,CAApE,CAAoIF,0BAA4B,IAAhK,CAAsK,CACpK,aAAeV,MAAMzR,KAArB,CAEA,UAAYR,EAAEoH,QAAF,EAAYkJ,KAAZ,EAAZ,CACA,QAAUhQ,MAAMC,IAAN,CAAW,KAAX,CAAV,CACA,GAAIgE,GAAJ,CAAS,CACPqW,SAAWpD,QAAQjT,GAAR,CAAX,CACA,GAAIqW,QAAJ,CAAc,eAAA,CACf,CAED,SAAWta,MAAMC,IAAN,CAAW,MAAX,CAAX,CACA,GAAIgb,IAAJ,CAAU,CACRX,SAAWpD,QAAQ+D,IAAR,CAAX,CACA,GAAIX,QAAJ,CAAc,eAAA,CACf,CAED,UAAYta,MAAMC,IAAN,CAAW,OAAX,CAAZ,CACA,GAAIC,KAAJ,CAAW,CACToa,SAAWpD,QAAQhX,KAAR,CAAX,CACA,GAAIoa,QAAJ,CAAc,eAAA,CACf,CACF,CACF,CAAC,MAAOjd,GAAP,CAAY,CACZsV,kBAAoB,IAApB,CACAC,eAAiBvV,GAAjB,CACD,CA1BD,OA0BU,CACR,GAAI,CACF,GAAI,CAACgV,yBAAD,EAA8BC,UAAUO,MAA5C,CAAoD,CAClDP,UAAUO,MAAV,GACD,CACF,CAJD,OAIU,CACR,GAAIF,iBAAJ,CAAuB,CACrB,oBAAA,CACD,CACF,CACF,CAED,WAAA,CACD,CApGgC,CAAnC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAoQA,wBAAA,CAAyBxH,KAAzB,CAAgC+P,UAAhC,CAA4CD,IAA5C,CAAkD;;;;;AAMhD,GAAI9P,MAAQ,CAAZ,CAAe,CACb,eAAiB,cAAYgQ,eAAZ,CAA4B,IAA5B,CAAkCD,UAAlC,CAA8CD,IAA9C,EAAoDG,KAApD,EAAjB;;;;;;AAOA,gBAAkB,IAAMC,UAAxB,CACA,iBAAmB,EAAE,KAAOC,YAAc,GAArB,CAAF,CAAnB,CACA,aAAeC,YAAf,CACD,CAED,QAAA,CACD,CAED,sBAAA,CAAuBnK,QAAvB,CAAiCvD,OAAjC,CAA0C;;;;AAKxC,UAAY,CAAZ,CAEA,GAAIG,YAAYhQ,IAAZ,CAAiBoT,SAASzG,IAAT,EAAjB,CAAJ,CAAuC,CACrC,kBAAoBrB,SAAS8H,QAAT,CAAmB,EAAnB,CAApB;;;AAIA,GAAIoK,cAAgB,CAApB,CAAuB,CACrBrQ,MAAQ,CAAC,EAAT,CACD,CAFD,IAEO,CACLA,MAAQS,KAAKC,GAAL,CAAS,CAAT,CAAY,GAAK2P,aAAjB,CAAR,CACD;;;AAKD,GAAI3N,SAAWA,SAAW2N,aAA1B,CAAyC,CACvCrQ,OAAS,EAAT,CACD,CACF,CAED,YAAA,CACD,CAED,wBAAA,CAAyB0C,OAAzB,CAAkC4N,IAAlC,CAAwC;;;AAItC,GAAI5N,SAAW,CAAC4N,IAAhB,CAAsB,CACpB,SAAA,CACD,CAED,QAAA,CACD,CAED,eAAiB,IAAjB;;AAIA,4BAA8B,CAAC,OAAD,CAAU,SAAV,CAAqB,SAArB,CAAgC,SAAhC,CAA2C,QAA3C,CAAqD,OAArD,CAA8D,OAA9D,CAAuE,OAAvE,CAAgF,KAAhF,CAAuF,OAAvF,CAAgG,MAAhG,CAAwG,QAAxG,CAAkH,KAAlH,CAAyH,iBAAzH,CAA9B,CACA,+BAAiC,UAAA,CAAWC,wBAAwB1e,IAAxB,CAA6B,GAA7B,CAAX,CAA8C,GAA9C,CAAjC;;;AAKA,wBAA0B,UAAA,CAAW,4CAAX,CAAyD,GAAzD,CAA1B;;AAIA,uBAAyB,UAAA,CAAW,kBAAX,CAA+B,GAA/B,CAAzB;;AAIA,wBAA0B,UAAA,CAAW,yBAAX,CAAsC,GAAtC,CAA1B;AAIA,6BAAA,CAA8Bie,IAA9B,CAAoC;AAElC,GAAIU,2BAA2B3d,IAA3B,CAAgCid,IAAhC,CAAJ,CAA2C,CACzC,MAAO,CAAC,EAAR,CACD,CAED,QAAA,CACD,CAED,kBAAA,CAAmBW,KAAnB,CAA0B,CACxB,MAAO,CAACA,MAAM3b,IAAN,CAAW,OAAX,GAAuB,EAAxB,EAA8B,GAA9B,EAAqC2b,MAAM3b,IAAN,CAAW,IAAX,GAAoB,EAAzD,CAAP,CACD,CAED,yBAAA,CAA0B2b,KAA1B,CAAiC;;;AAI/B,YAAcA,MAAMpR,MAAN,EAAd,CACA,kBAAoB,KAApB,CACA,kBAAoB,KAApB,CACA,UAAY,CAAZ,CAEAqR,YAAYlgB,MAAM,CAAN,CAAS,CAAT,CAAZ,EAAyB6E,OAAzB,CAAiC,UAAY,CAC3C,GAAIkD,QAAQ1H,MAAR,GAAmB,CAAvB,CAA0B,CACxB,OACD,CAED,eAAiB8f,UAAUpY,OAAV,CAAmB,GAAnB,CAAjB;;AAIA,GAAI,CAACqY,aAAD,EAAkBC,QAAQhe,IAAR,CAAaie,UAAb,CAAtB,CAAgD,CAC9CF,cAAgB,IAAhB,CACA5Q,OAAS,EAAT,CACD;;;AAKD,GAAI,CAAC+Q,aAAD,EAAkBC,kBAAkBne,IAAlB,CAAuBie,UAAvB,CAAlB,EAAwDN,2BAA2B3d,IAA3B,CAAgCie,UAAhC,CAA5D,CAAyG,CACvG,GAAI,CAACG,kBAAkBpe,IAAlB,CAAuBie,UAAvB,CAAL,CAAyC,CACvCC,cAAgB,IAAhB,CACA/Q,OAAS,EAAT,CACD,CACF,CAEDzH,QAAUA,QAAQ8G,MAAR,EAAV,CACD,CAzBD,EA2BA,YAAA,CACD,CAED,sBAAA,CAAuB6R,QAAvB,CAAiC;;AAG/B,GAAIC,oBAAoBte,IAApB,CAAyBqe,QAAzB,CAAJ,CAAwC,CACtC,MAAO,CAAC,GAAR,CACD,CAED,QAAA,CACD,CAED,oBAAA,CAAqBpB,IAArB,CAA2BC,UAA3B,CAAuCqB,OAAvC,CAAgDne,SAAhD,CAA2DgT,QAA3D,CAAqEoL,YAArE,CAAmF;AAEjF,GAAIA,aAAa1b,IAAb,CAAkB,SAAU3C,GAAV,CAAe,CACnC,cAAgBA,GAAhB,CACD,CAFG,IAEGlC,SAFP,CAEkB,CAChB,YAAA,CACD;;AAID,GAAI,CAACgf,IAAD,EAASA,OAASC,UAAlB,EAAgCD,OAASsB,OAA7C,CAAsD,CACpD,YAAA,CACD,CAED,aAAene,UAAUzB,QAAzB,CAEA,eAAiB2B,MAAIC,KAAJ,CAAU0c,IAAV,CAAjB,CACIwB,SAAWzS,WAAWrN,QAD1B;AAMA,GAAI8f,WAAa9f,QAAjB,CAA2B,CACzB,YAAA,CACD;;AAID,aAAese,KAAK/W,OAAL,CAAaqY,OAAb,CAAsB,EAAtB,CAAf,CACA,GAAI,CAACG,WAAW1e,IAAX,CAAgB2e,QAAhB,CAAL,CAAgC,CAC9B,YAAA,CACD;;AAID,GAAIhB,2BAA2B3d,IAA3B,CAAgCoT,QAAhC,CAAJ,CAA+C,CAC7C,YAAA,CACD;AAGD,GAAIA,SAASpV,MAAT,CAAkB,EAAtB,CAA0B,CACxB,YAAA,CACD,CAED,WAAA,CACD,CAED,qBAAA,CAAsBif,IAAtB,CAA4B2B,SAA5B,CAAuC;;;;AAKrC,GAAI,CAACA,UAAU5e,IAAV,CAAeid,IAAf,CAAL,CAA2B,CACzB,MAAO,CAAC,EAAR,CACD,CAED,QAAA,CACD,CAED,0BAAA,CAA2BoB,QAA3B,CAAqC;AAEnC,GAAIQ,oBAAoB7e,IAApB,CAAyBqe,QAAzB,CAAJ,CAAwC,CACtC,SAAA,CACD,CAED,QAAA,CACD,CAED,sBAAA,CAAuBA,QAAvB,CAAiC;AAE/B,GAAIS,mBAAmB9e,IAAnB,CAAwBqe,QAAxB,CAAJ,CAAuC;;;;AAKrC,GAAIQ,oBAAoB7e,IAApB,CAAyBqe,QAAzB,CAAJ,CAAwC,CACtC,MAAO,CAAC,EAAR,CACD,CACF,CAED,QAAA,CACD,CAED,sBAAA,CAAuBE,OAAvB,CAAgC,CAC9B,iBAAO,CAAW,IAAMA,OAAjB,CAA0B,GAA1B,CAAP,CACD,CAED,gBAAA,CAAiBX,KAAjB,CAAwBxK,QAAxB,CAAkC,CAChC,MAAO,CAACA,UAAYwK,MAAMlR,IAAN,EAAb,EAA6B,GAA7B,EAAoCkR,MAAM3b,IAAN,CAAW,OAAX,GAAuB,EAA3D,EAAiE,GAAjE,EAAwE2b,MAAM3b,IAAN,CAAW,IAAX,GAAoB,EAA5F,CAAP,CACD,CAED,mBAAA,CAAoBvD,IAApB,CAA0B,CACxB,UAAYA,KAAKqgB,KAAjB,CACI7B,WAAaxe,KAAKwe,UADtB,CAEIqB,QAAU7f,KAAK6f,OAFnB,CAGIne,UAAY1B,KAAK0B,SAHrB,CAIIsB,EAAIhD,KAAKgD,CAJb,CAKIsd,kBAAoBtgB,KAAK8f,YAL7B,CAMIA,aAAeQ,oBAAsB/gB,SAAtB,CAAkC,EAAlC,CAAuC+gB,iBAN1D,CAQA5e,UAAYA,WAAaE,MAAIC,KAAJ,CAAU2c,UAAV,CAAzB,CACA,cAAgB+B,cAAcV,OAAd,CAAhB,CACA,SAAWW,YAAYxd,CAAZ,CAAX;;;;;;;AASA,gBAAkBqd,MAAMza,MAAN,CAAa,SAAU6a,aAAV,CAAyBC,IAAzB,CAA+B;;;AAI5D,SAAWC,aAAaD,KAAK7c,OAAL,CAAa0a,IAA1B,CAAX,CACA,UAAYvb,EAAE0d,IAAF,CAAZ,CACA,aAAexB,MAAMlR,IAAN,EAAf,CAEA,GAAI,CAAC4S,YAAYrC,IAAZ,CAAkBC,UAAlB,CAA8BqB,OAA9B,CAAuCne,SAAvC,CAAkDgT,QAAlD,CAA4DoL,YAA5D,CAAL,CAAgF,CAC9E,oBAAA,CACD;AAGD,GAAI,CAACW,cAAclC,IAAd,CAAL,CAA0B,CACxBkC,cAAclC,IAAd,EAAsB,CACpB9P,MAAO,CADa,CAEpBiG,SAAUA,QAFU,CAGpB6J,KAAMA,IAHc,CAAtB,CAKD,CAND,IAMO,CACLkC,cAAclC,IAAd,EAAoB7J,QAApB,CAA+B+L,cAAclC,IAAd,EAAoB7J,QAApB,CAA+B,GAA/B,CAAqCA,QAApE,CACD,CAED,iBAAmB+L,cAAclC,IAAd,CAAnB,CACA,aAAesC,QAAQ3B,KAAR,CAAexK,QAAf,CAAf,CACA,YAAcoM,eAAevC,IAAf,CAAd,CAEA,UAAYwC,aAAaxC,IAAb,CAAmB2B,SAAnB,CAAZ,CACAzR,OAASuS,kBAAkBrB,QAAlB,CAAT,CACAlR,OAASwS,cAActB,QAAd,CAAT,CACAlR,OAASyS,cAAcvB,QAAd,CAAT,CACAlR,OAAS0S,iBAAiBjC,KAAjB,CAAT,CACAzQ,OAAS2S,qBAAqB7C,IAArB,CAAT,CACA9P,OAAS4S,gBAAgBlQ,OAAhB,CAAyB4N,IAAzB,CAAT,CACAtQ,OAAS6S,cAAc5M,QAAd,CAAwBvD,OAAxB,CAAT,CACA1C,OAAS8S,gBAAgB9S,KAAhB,CAAuB+P,UAAvB,CAAmCD,IAAnC,CAAT,CAEAiD,aAAa/S,KAAb,CAAqBA,KAArB,CAEA,oBAAA,CACD,CAxCiB,CAwCf,EAxCe,CAAlB,CA0CA,0BAAwBgT,WAAjB,EAA8BniB,MAA9B,GAAyC,CAAzC,CAA6C,IAA7C,CAAoDmiB,WAA3D,CACD;;AAID,gCAAkC,CAChCtG,QAAS,gBAAA,CAAiBnb,IAAjB,CAAuB,CAC9B,MAAQA,KAAKgD,CAAb,CACIvB,IAAMzB,KAAKyB,GADf,CAEIC,UAAY1B,KAAK0B,SAFrB,CAGI4e,kBAAoBtgB,KAAK8f,YAH7B,CAIIA,aAAeQ,oBAAsB/gB,SAAtB,CAAkC,EAAlC,CAAuC+gB,iBAJ1D,CAMA5e,UAAYA,WAAaE,MAAIC,KAAJ,CAAUJ,GAAV,CAAzB,CAEA,eAAiBkf,aAAalf,GAAb,CAAjB,CACA,YAAcigB,eAAejgB,GAAf,CAAoBC,SAApB,CAAd,CAEA,UAAYsB,EAAE,SAAF,EAAamS,OAAb,EAAZ,CAEA,gBAAkBwM,WAAW,CAC3BtB,MAAOA,KADoB,CAE3B7B,WAAYA,UAFe,CAG3BqB,QAASA,OAHkB,CAI3Bne,UAAWA,SAJgB,CAK3BsB,EAAGA,CALwB,CAM3B8c,aAAcA,YANa,CAAX,CAAlB;AAUA,GAAI,CAAC8B,WAAL,CAAkB,WAAA;;AAIlB,YAAche,mBAAiBge,WAAjB,EAA8Bhc,MAA9B,CAAqC,SAAUC,GAAV,CAAe6a,IAAf,CAAqB,CACtE,eAAiBkB,YAAYlB,IAAZ,CAAjB,CACA,kBAAkBjS,KAAX,CAAmB5I,IAAI4I,KAAvB,CAA+BoT,UAA/B,CAA4Chc,GAAnD,CACD,CAHa,CAGX,CAAE4I,MAAO,CAAC,GAAV,CAHW,CAAd;;AAOA,GAAIqT,QAAQrT,KAAR,EAAiB,EAArB,CAAyB,CACvB,eAAe8P,IAAf,CACD,CAED,WAAA,CACD,CAzC+B,CAAlC,CA4CA,6BAA+B,CAAC,QAAD,CAA/B,CAEA,oBAAA,CAAqB9c,GAArB,CAA0B,CACxB,cAAgBG,MAAIC,KAAJ,CAAUJ,GAAV,CAAhB,CACA,aAAeC,UAAUzB,QAAzB,CAEA,eAAA,CACD,CAED,eAAA,CAAgBwB,GAAhB,CAAqB,CACnB,MAAO,CACLA,IAAKA,GADA,CAELqE,OAAQic,YAAYtgB,GAAZ,CAFH,CAAP,CAID,CAED,wBAA0B,CACxB0Z,QAAS,gBAAA,CAAiBnb,IAAjB,CAAuB,CAC9B,MAAQA,KAAKgD,CAAb,CACIvB,IAAMzB,KAAKyB,GADf,CAEIga,UAAYzb,KAAKyb,SAFrB,CAIA,eAAiBzY,EAAE,qBAAF,CAAjB,CACA,GAAIgf,WAAW1iB,MAAX,GAAsB,CAA1B,CAA6B,CAC3B,SAAW0iB,WAAWze,IAAX,CAAgB,MAAhB,CAAX,CACA,GAAIgb,IAAJ,CAAU,CACR,cAAcA,IAAP,CAAP,CACD,CACF,CAED,YAAc7C,mBAAmB1Y,CAAnB,CAAsBif,wBAAtB,CAAgDxG,SAAhD,CAAd,CACA,GAAIyG,OAAJ,CAAa,CACX,cAAcA,OAAP,CAAP,CACD,CAED,cAAczgB,GAAP,CAAP,CACD,CApBuB,CAA1B,CAuBA,2BAA6B,CAAC,gBAAD,CAAmB,qBAAnB,CAA7B,CAEA,gBAAA,CAAiB2D,OAAjB,CAA0BpC,CAA1B,CAA6B,CAC3B,cAAgB3D,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,GAApF,CAEA+F,QAAUA,QAAQoC,OAAR,CAAgB,UAAhB,CAA4B,GAA5B,EAAiCyG,IAAjC,EAAV,CACA,mBAAiB7I,OAAV,CAAmB+c,SAAnB,CAA8B,CAAEC,QAAS,UAAX,CAA9B,CAAP,CACD,CAED,4BAA8B,CAC5BjH,QAAS,gBAAA,CAAiBnb,IAAjB,CAAuB,CAC9B,MAAQA,KAAKgD,CAAb,CACIoC,QAAUpF,KAAKoF,OADnB,CAEIqW,UAAYzb,KAAKyb,SAFrB,CAIA,YAAcC,mBAAmB1Y,CAAnB,CAAsBqf,sBAAtB,CAA8C5G,SAA9C,CAAd,CACA,GAAI7T,OAAJ,CAAa,CACX,eAAe2N,UAAU3N,OAAV,CAAmB5E,CAAnB,CAAR,CAAP,CACD;AAED,cAAgB,GAAhB,CACA,iBAAmBoC,QAAQkK,KAAR,CAAc,CAAd,CAAiB6S,UAAY,CAA7B,CAAnB,CACA,eAAenf,EAAEsf,YAAF,EAAgBtU,IAAhB,EAAR,CAAgChL,CAAhC,CAAmCmf,SAAnC,CAAP,CACD,CAd2B,CAA9B,CAiBA,8BAAgC,CAC9BhH,QAAS,gBAAA,CAAiBnb,IAAjB,CAAuB,CAC9B,YAAcA,KAAKoF,OAAnB,CAEA,MAAQF,UAAQC,IAAR,CAAaC,OAAb,CAAR,CAEA,SAAWsO,gBAAgB1Q,EAAE,KAAF,EAASsQ,KAAT,GAAiBtF,IAAjB,EAAhB,CAAX,CACA,YAAYlG,KAAL,CAAW,IAAX,EAAiBxI,MAAxB,CACD,CAR6B,CAAhC,CAWA,qBAAuB;AAErBwG,OAAQ,GAFa,CAGrBS,MAAOgc,sBAAsBpH,OAHR,CAIrB3U,eAAgBgc,8BAA8BrH,OAJzB,CAKrB7U,OAAQmc,uBAAuBtH,OALV,CAMrB/V,QAASsd,wBAAwBvH,OAAxB,CAAgCwH,IAAhC,CAAqCD,uBAArC,CANY,CAOrBhb,eAAgBkb,6BAA6BzH,OAPxB,CAQrBtU,IAAKgc,oBAAoB1H,OARJ,CASrBxT,cAAemb,4BAA4B3H,OATtB,CAUrB4H,eAAgBC,oBAAoB7H,OAVf,CAWrBvT,QAASqb,wBAAwB9H,OAXZ,CAYrB+H,WAAYC,0BAA0BhI,OAZjB,CAarBiI,UAAW,kBAAA,CAAmBpjB,IAAnB,CAAyB,CAClC,UAAYA,KAAKuG,KAAjB,CACA,yBAAuB8c,YAAhB,CAA6B9c,KAA7B,CAAP,CACD,CAhBoB,CAkBrB4U,QAAS,gBAAA,CAAiB5a,OAAjB,CAA0B,CACjC,SAAWA,QAAQiW,IAAnB,CAGA,GAAIA,IAAJ,CAAU,CACR,MAAQtR,UAAQC,IAAR,CAAaqR,IAAb,CAAR,CACAjW,QAAQyC,CAAR,CAAYA,CAAZ,CACD,CAED,UAAY,KAAKuD,KAAL,CAAWhG,OAAX,CAAZ,CACA,mBAAqB,KAAKiG,cAAL,CAAoBjG,OAApB,CAArB,CACA,WAAa,KAAK+F,MAAL,CAAY/F,OAAZ,CAAb,CACA,YAAc,KAAK6E,OAAL,CAAarD,WAAS,EAAT,CAAaxB,OAAb,CAAsB,CAAEgG,MAAOA,KAAT,CAAtB,CAAb,CAAd,CACA,mBAAqB,KAAKmB,cAAL,CAAoB3F,WAAS,EAAT,CAAaxB,OAAb,CAAsB,CAAE6E,QAASA,OAAX,CAAtB,CAApB,CAArB,CACA,QAAU,KAAKyB,GAAL,CAAS9E,WAAS,EAAT,CAAaxB,OAAb,CAAsB,CAAE6E,QAASA,OAAX,CAAtB,CAAT,CAAV,CACA,kBAAoB,KAAKuC,aAAL,CAAmBpH,OAAnB,CAApB,CACA,YAAc,KAAKqH,OAAL,CAAa7F,WAAS,EAAT,CAAaxB,OAAb,CAAsB,CAAE6E,QAASA,OAAX,CAAtB,CAAb,CAAd,CACA,eAAiB,KAAK8d,UAAL,CAAgBnhB,WAAS,EAAT,CAAaxB,OAAb,CAAsB,CAAE6E,QAASA,OAAX,CAAtB,CAAhB,CAAjB,CACA,cAAgB,KAAKge,SAAL,CAAe,CAAE7c,MAAOA,KAAT,CAAf,CAAhB,CAEA,oBAAsB,KAAKwc,cAAL,CAAoBxiB,OAApB,CAAtB,CACIkB,IAAM6hB,gBAAgB7hB,GAD1B,CAEIqE,OAASwd,gBAAgBxd,MAF7B,CAIA,MAAO,CACLS,MAAOA,KADF,CAELD,OAAQA,MAFH,CAGLE,eAAgBA,gBAAkB,IAH7B,CAILK,IAAKA,GAJA,CAKLa,eAAgBA,cALX,CAMLtC,QAASA,OANJ,CAOLuC,cAAeA,aAPV,CAQLlG,IAAKA,GARA,CASLqE,OAAQA,MATH,CAUL8B,QAASA,OAVJ,CAWLsb,WAAYA,UAXP,CAYLE,UAAWA,SAZN,CAAP,CAcD,CAxDoB,CAAvB,CA2DA,qBAAA,CAAsB3hB,GAAtB,CAA2BC,SAA3B,CAAsC,CACpCA,UAAYA,WAAaE,MAAIC,KAAJ,CAAUJ,GAAV,CAAzB,CACA,eAAiBC,SAAjB,CACIzB,SAAWsjB,WAAWtjB,QAD1B,CAGA,eAAiBA,SAAS6H,KAAT,CAAe,GAAf,EAAoBwH,KAApB,CAA0B,CAAC,CAA3B,EAA8BhP,IAA9B,CAAmC,GAAnC,CAAjB,CAEA,kBAAkBL,QAAX,GAAwBujB,WAAWC,UAAX,CAAxB,EAAkDC,gBAAzD,CACD;AAGD,yBAAA,CAA0BlP,QAA1B,CAAoCxR,CAApC,CAAuChD,IAAvC,CAA6C,CAC3C,UAAYA,KAAKyF,KAAjB,CAEA,GAAI,CAACA,KAAL,CAAY,eAAA,CAEZzC,EAAEyC,MAAMnF,IAAN,CAAW,GAAX,CAAF,CAAmBkU,QAAnB,EAA6BhQ,MAA7B,GAEA,eAAA,CACD;AAGD,0BAAA,CAA2BgQ,QAA3B,CAAqCxR,CAArC,CAAwCyN,KAAxC,CAA+C,CAC7C,eAAiBA,MAAMrK,UAAvB,CAEA,GAAI,CAACA,UAAL,CAAiB,eAAA,CAEjBxC,mBAAiBwC,UAAjB,EAA6BtC,OAA7B,CAAqC,SAAUmG,GAAV,CAAe,CAClD,aAAejH,EAAEiH,GAAF,CAAOuK,QAAP,CAAf,CACA,UAAYpO,WAAW6D,GAAX,CAAZ;AAGA,GAAI,YAAA,GAAiB,QAArB,CAA+B,CAC7B0Z,SAASxgB,IAAT,CAAc,SAAUc,KAAV,CAAiBZ,IAAjB,CAAuB,CACnC+I,cAAcpJ,EAAEK,IAAF,CAAd,CAAuBL,CAAvB,CAA0BoD,WAAW6D,GAAX,CAA1B,EACD,CAFD,EAGD,CAJD,QAIW,YAAA,GAAiB,UAArB,CAAiC;AAEtC0Z,SAASxgB,IAAT,CAAc,SAAUc,KAAV,CAAiBZ,IAAjB,CAAuB,CACnC,WAAaG,MAAMR,EAAEK,IAAF,CAAN,CAAeL,CAAf,CAAb;AAEA,GAAI,aAAA,GAAkB,QAAtB,CAAgC,CAC9BoJ,cAAcpJ,EAAEK,IAAF,CAAd,CAAuBL,CAAvB,CAA0B8B,MAA1B,EACD,CACF,CAND,EAOD,CACF,CAnBD,EAqBA,eAAA,CACD,CAED,6BAAA,CAA8B9B,CAA9B,CAAiCmD,SAAjC,CAA4C,CAC1C,iBAAiB/B,IAAV,CAAe,SAAUgG,QAAV,CAAoB,CACxC,GAAIwZ,MAAMC,OAAN,CAAczZ,QAAd,CAAJ,CAA6B,CAC3B,cAAgB1B,iBAAe0B,QAAf,CAAyB,CAAzB,CAAhB,CACI9C,EAAIwc,UAAU,CAAV,CADR,CAEIvgB,KAAOugB,UAAU,CAAV,CAFX,CAIA,SAASxc,CAAF,EAAKhI,MAAL,GAAgB,CAAhB,EAAqB0D,EAAEsE,CAAF,EAAK/D,IAAL,CAAUA,IAAV,CAArB,EAAwCP,EAAEsE,CAAF,EAAK/D,IAAL,CAAUA,IAAV,EAAgB0K,IAAhB,KAA2B,EAA1E,CACD,CAED,SAAS7D,QAAF,EAAY9K,MAAZ,GAAuB,CAAvB,EAA4B0D,EAAEoH,QAAF,EAAY4D,IAAZ,GAAmBC,IAAnB,KAA8B,EAAjE,CACD,CAVM,CAAP,CAWD,CAED,eAAA,CAAgB4M,IAAhB,CAAsB,CACpB,MAAQA,KAAK7X,CAAb,CACIkB,KAAO2W,KAAK3W,IADhB,CAEI6f,eAAiBlJ,KAAKkJ,cAF1B,CAGIC,kBAAoBnJ,KAAKoJ,WAH7B,CAIIA,YAAcD,oBAAsBzkB,SAAtB,CAAkC,KAAlC,CAA0CykB,iBAJ5D;AAOA,GAAI,CAACD,cAAL,CAAqB,WAAA;;AAIrB,GAAI,qBAAA,GAA0B,QAA9B,CAAwC,qBAAA,CAExC,cAAgBA,eAAe5d,SAA/B,CACI+d,sBAAwBH,eAAejd,cAD3C,CAEIA,eAAiBod,wBAA0B3kB,SAA1B,CAAsC,IAAtC,CAA6C2kB,qBAFlE,CAKA,qBAAuBC,qBAAqBnhB,CAArB,CAAwBmD,SAAxB,CAAvB,CAEA,GAAI,CAACie,gBAAL,CAAuB,WAAA;;;;;AAQvB,GAAIH,WAAJ,CAAiB,CACf,aAAejhB,EAAEohB,gBAAF,CAAf;AAGA5P,SAAShV,IAAT,CAAcwD,EAAE,aAAF,CAAd,EACAwR,SAAWA,SAAS1G,MAAT,EAAX,CAEA0G,SAAW6P,kBAAkB7P,QAAlB,CAA4BxR,CAA5B,CAA+B+gB,cAA/B,CAAX,CACAvP,SAAW8P,iBAAiB9P,QAAjB,CAA2BxR,CAA3B,CAA8B+gB,cAA9B,CAAX,CAEAvP,SAAW+P,SAASrgB,IAAT,EAAesQ,QAAf,CAAyBzS,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CAAE/T,eAAgBA,cAAlB,CAAnB,CAAzB,CAAX,CAEA,SAAS0P,IAAF,CAAOhC,QAAP,CAAP,CACD,CAED,WAAa,MAAb;;AAIA,GAAIoP,MAAMC,OAAN,CAAcO,gBAAd,CAAJ,CAAqC,CACnC,sBAAwB1b,iBAAe0b,gBAAf,CAAiC,CAAjC,CAAxB,CACIha,SAAWoa,kBAAkB,CAAlB,CADf,CAEIjhB,KAAOihB,kBAAkB,CAAlB,CAFX,CAIA1f,OAAS9B,EAAEoH,QAAF,EAAY7G,IAAZ,CAAiBA,IAAjB,EAAuB0K,IAAvB,EAAT,CACD,CAND,IAMO,CACLnJ,OAAS9B,EAAEohB,gBAAF,EAAoBpW,IAApB,GAA2BC,IAA3B,EAAT,CACD;;AAID,GAAInH,cAAJ,CAAoB,CAClB,gBAAgB5C,IAAT,EAAeY,MAAf,CAAuB+V,IAAvB,CAAP,CACD,CAED,aAAA,CACD,CAED,sBAAA,CAAuBA,IAAvB,CAA6B,CAC3B,SAAWA,KAAK3W,IAAhB,CACIwB,UAAYmV,KAAKnV,SADrB,CAEI+e,eAAiB5J,KAAK6J,QAF1B,CAGIA,SAAWD,iBAAmBllB,SAAnB,CAA+B,IAA/B,CAAsCklB,cAHrD,CAMA,WAAaE,OAAO5iB,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CAAEkJ,eAAgBre,UAAUxB,IAAV,CAAlB,CAAnB,CAAP,CAAb;AAGA,GAAIY,MAAJ,CAAY,CACV,aAAA,CACD;;AAID,GAAI4f,QAAJ,CAAc,wBAAwBxgB,IAAjB,EAAuB2W,IAAvB,CAAP,CAEd,WAAA,CACD,CAED,kBAAoB,CAClBM,QAAS,gBAAA,EAAmB,CAC1B,cAAgB9b,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoEqkB,gBAApF,CACA,SAAWrkB,UAAU,CAAV,CAAX,CACA,UAAYwb,IAAZ,CACI+J,YAAcC,MAAMD,WADxB,CAEIE,eAAiBD,MAAMC,cAF3B;AAKA,GAAIpf,UAAUI,MAAV,GAAqB,GAAzB,CAA8B,iBAAiBqV,OAAV,CAAkBN,IAAlB,CAAP,CAE9BA,KAAO9Y,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CACxBnV,UAAWA,SADa,CAAnB,CAAP,CAIA,GAAIkf,WAAJ,CAAiB,CACf,aAAeG,cAAchjB,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CAAE3W,KAAM,SAAR,CAAmB+f,YAAa,IAAhC,CAAsC1d,MAAOue,cAA7C,CAAnB,CAAd,CAAf,CAEA,MAAO,CACL1f,QAAS4f,QADJ,CAAP,CAGD,CACD,UAAYD,cAAchjB,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CAAE3W,KAAM,OAAR,CAAnB,CAAd,CAAZ,CACA,mBAAqB6gB,cAAchjB,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CAAE3W,KAAM,gBAAR,CAAnB,CAAd,CAArB,CACA,WAAa6gB,cAAchjB,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CAAE3W,KAAM,QAAR,CAAnB,CAAd,CAAb,CACA,kBAAoB6gB,cAAchjB,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CAAE3W,KAAM,eAAR,CAAnB,CAAd,CAApB,CACA,YAAc6gB,cAAchjB,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CAAE3W,KAAM,SAAR,CAAmB+f,YAAa,IAAhC,CAAsC1d,MAAOA,KAA7C,CAAnB,CAAd,CAAd,CAEA,mBAAqBwe,cAAchjB,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CAAE3W,KAAM,gBAAR,CAA0BkB,QAASA,OAAnC,CAAnB,CAAd,CAArB,CACA,YAAc2f,cAAchjB,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CAAE3W,KAAM,SAAR,CAAmBkB,QAASA,OAA5B,CAAnB,CAAd,CAAd,CACA,QAAU2f,cAAchjB,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CAAE3W,KAAM,KAAR,CAAekB,QAASA,OAAxB,CAAiCwC,QAASA,OAA1C,CAAnB,CAAd,CAAV,CACA,eAAiBmd,cAAchjB,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CAAE3W,KAAM,YAAR,CAAsBkB,QAASA,OAA/B,CAAnB,CAAd,CAAjB,CACA,cAAgB2f,cAAchjB,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CAAE3W,KAAM,WAAR,CAAqBqC,MAAOA,KAA5B,CAAnB,CAAd,CAAhB,CAEA,UAAYwe,cAAchjB,WAAS,EAAT,CAAa8Y,IAAb,CAAmB,CAAE3W,KAAM,gBAAR,CAAnB,CAAd,GAAiE,CAAEzC,IAAK,IAAP,CAAaqE,OAAQ,IAArB,CAA7E,CACIrE,IAAME,MAAMF,GADhB,CAEIqE,OAASnE,MAAMmE,MAFnB,CAIA,MAAO,CACLS,MAAOA,KADF,CAELnB,QAASA,OAFJ,CAGLkB,OAAQA,MAHH,CAILE,eAAgBA,cAJX,CAKLkB,eAAgBA,cALX,CAMLb,IAAKA,GANA,CAOLc,cAAeA,aAPV,CAQLlG,IAAKA,GARA,CASLqE,OAAQA,MATH,CAUL8B,QAASA,OAVJ,CAWLsb,WAAYA,UAXP,CAYLE,UAAWA,SAZN,CAAP,CAcD,CApDiB,CAApB,CAuDA,oBAAuB,UAAY,CACjC,SAAW5hB,kBAAkBrC,oBAAoBC,IAApB,CAAyB,gBAAA,CAAiBqR,KAAjB,CAAwB,CAC5E,kBAAoBA,MAAM9I,aAA1B,CACI6O,KAAO/F,MAAM+F,IADjB,CAEIxT,EAAIyN,MAAMzN,CAFd,CAGIyY,UAAYhL,MAAMgL,SAHtB,CAII3W,OAAS2L,MAAM3L,MAJnB,CAKImgB,UAAYxU,MAAMwU,SALtB,CAMI1e,MAAQkK,MAAMlK,KANlB,CAOI9E,IAAMgP,MAAMhP,GAPhB,CAQA,SAAA,CAAWqe,YAAX,CAAyBoF,aAAzB,CAAwCC,cAAxC,CAAwDjC,UAAxD,CACA,2BAA2B1jB,IAApB,CAAyB,iBAAA,CAAkBC,QAAlB,CAA4B,CAC1D,MAAO,CAAP,CAAU,CACR,OAAQA,SAASC,IAAT,CAAgBD,SAASE,IAAjC,EACE,MAAA;AAEEylB,MAAQ,CAAR,CACAtF,aAAe,CAACa,aAAalf,GAAb,CAAD,CAAf;;AAKF,MAAA,CACE,GAAI,EAAEkG,eAAiByd,MAAQ,EAA3B,CAAJ,CAAoC,CAClC3lB,SAASE,IAAT,CAAgB,EAAhB,CACA,MACD,CAEDylB,OAAS,CAAT,CACA3lB,SAASE,IAAT,CAAgB,CAAhB,CACA,gBAAgBgF,MAAT,CAAgBgD,aAAhB,CAAP,CAEF,MAAA,CACE3E,EAAIvD,SAAS8C,IAAb,CAEAiU,KAAOxT,EAAEwT,IAAF,EAAP,CAEA0O,cAAgB,CACdzjB,IAAKkG,aADS,CAEd6O,KAAMA,IAFQ,CAGdxT,EAAGA,CAHW,CAIdyY,UAAWA,SAJG,CAKdmJ,YAAa,IALC,CAMdE,eAAgBve,KANF,CAOduZ,aAAcA,YAPA,CAAhB,CASAqF,eAAiBE,cAAclK,OAAd,CAAsB8J,SAAtB,CAAiCC,aAAjC,CAAjB,CAGApF,aAAazN,IAAb,CAAkB1K,aAAlB,EACA7C,OAAS/C,WAAS,EAAT,CAAa+C,MAAb,CAAqB,CAC5BM,QAAS,aAAeN,OAAOM,OAAtB,CAAgC,mCAAhC,CAAsEggB,KAAtE,CAA8E,iBAA9E,CAAkGD,eAAe/f,OAAjH,CAA2H,YADxG,CAArB,CAAT,CAIAuC,cAAgBwd,eAAexd,aAA/B,CACAlI,SAASE,IAAT,CAAgB,CAAhB,CACA,MAEF,OAAA,CACEujB,WAAaQ,iBAAiBR,UAAjB,CAA4B,CAAE9d,QAAS,QAAUN,OAAOM,OAAjB,CAA2B,QAAtC,CAA5B,CAAb,CACA,gBAAgB3C,MAAT,CAAgB,QAAhB,CAA0BV,WAAS,EAAT,CAAa+C,MAAb,CAAqB,CACpDwgB,YAAaF,KADuC,CAEpDG,eAAgBH,KAFoC,CAGpDlC,WAAYA,UAHwC,CAArB,CAA1B,CAAP,CAMF,OAAA,CACA,IAAK,KAAL,CACE,gBAAgBpjB,IAAT,EAAP,CAvDJ,CAyDD,CACF,CA5DM,CA4DJ8C,OA5DI,CA4DK,IA5DL,CAAP,CA6DD,CAvE4B,CAAlB,CAAX,CAyEA,wBAAA,CAAyB4iB,EAAzB,CAA6B,CAC3B,YAAYziB,KAAL,CAAW,IAAX,CAAiB1D,SAAjB,CAAP,CACD,CAED,sBAAA,CACD,CA/EqB,EAAtB,CAiFA,YAAc,CACZwC,MAAO,cAAA,CAAeJ,GAAf,CAAoB+U,IAApB,CAA0B,CAC/B,UAAY,IAAZ,CAEA,SAAWnX,UAAUC,MAAV,CAAmB,CAAnB,EAAwBD,UAAU,CAAV,IAAiBE,SAAzC,CAAqDF,UAAU,CAAV,CAArD,CAAoE,EAA/E,CACA,yBAAyBF,oBAAoBC,IAApB,CAAyB,gBAAA,EAAmB,CACnE,uBAAA,CAAyBqmB,aAAzB,CAAwChB,cAAxC,CAAwDC,QAAxD,CAAkEhjB,SAAlE,CAA6EujB,SAA7E,CAAwFjiB,CAAxF,CAA2FyY,SAA3F,CAAsG3W,MAAtG,CAA8G4gB,OAA9G,CAAuHnf,KAAvH,CAA8HoB,aAA9H,CAEA,2BAA2BnI,IAApB,CAAyB,iBAAA,CAAkBC,QAAlB,CAA4B,CAC1D,MAAO,CAAP,CAAU,CACR,OAAQA,SAASC,IAAT,CAAgBD,SAASE,IAAjC,EACE,MAAA,CACEgmB,oBAAsB9K,KAAK4K,aAA3B,CAA0CA,cAAgBE,sBAAwBpmB,SAAxB,CAAoC,IAApC,CAA2ComB,mBAArG,CAA0HlB,eAAiB5J,KAAK6J,QAAhJ,CAA0JA,SAAWD,iBAAmBllB,SAAnB,CAA+B,IAA/B,CAAsCklB,cAA3M,CACA/iB,UAAYE,MAAIC,KAAJ,CAAUJ,GAAV,CAAZ,CAEA,GAAImkB,YAAYlkB,SAAZ,CAAJ,CAA4B,CAC1BjC,SAASE,IAAT,CAAgB,CAAhB,CACA,MACD,CAED,gBAAgB8C,MAAT,CAAgB,QAAhB,CAA0BE,OAAOzC,MAAjC,CAAP,CAEF,MAAA,CACE+kB,UAAYY,aAAapkB,GAAb,CAAkBC,SAAlB,CAAZ;AAGAjC,SAASE,IAAT,CAAgB,CAAhB,CACA,gBAAgBgF,MAAT,CAAgBlD,GAAhB,CAAqB+U,IAArB,CAA2B9U,SAA3B,CAAP,CAEF,MAAA,CACEsB,EAAIvD,SAAS8C,IAAb,CAEA,GAAI,CAACS,EAAE7C,KAAP,CAAc,CACZV,SAASE,IAAT,CAAgB,EAAhB,CACA,MACD,CAED,gBAAgB8C,MAAT,CAAgB,QAAhB,CAA0BO,CAA1B,CAAP,CAEF,OAAA,CAEEwT,KAAOxT,EAAEwT,IAAF,EAAP;;AAIAiF,UAAYzY,EAAE,MAAF,EAAU9D,GAAV,CAAc,SAAUkE,CAAV,CAAaC,IAAb,CAAmB,CAC3C,SAASA,IAAF,EAAQE,IAAR,CAAa,MAAb,CAAP,CACD,CAFW,EAET4R,OAFS,EAAZ,CAGArQ,OAASugB,cAAclK,OAAd,CAAsB8J,SAAtB,CAAiC,CAAExjB,IAAKA,GAAP,CAAY+U,KAAMA,IAAlB,CAAwBxT,EAAGA,CAA3B,CAA8ByY,UAAWA,SAAzC,CAAoD/Z,UAAWA,SAA/D,CAA0EgjB,SAAUA,QAApF,CAAjC,CAAT,CACAgB,QAAU5gB,MAAV,CAAkByB,MAAQmf,QAAQnf,KAAlC,CAAyCoB,cAAgB+d,QAAQ/d,aAAjE;AAIA,GAAI,EAAE8d,eAAiB9d,aAAnB,CAAJ,CAAuC,CACrClI,SAASE,IAAT,CAAgB,EAAhB,CACA,MACD,CAEDF,SAASE,IAAT,CAAgB,EAAhB,CACA,uBAAuB,CACrBslB,UAAWA,SADU,CAErBtd,cAAeA,aAFM,CAGrB6O,KAAMA,IAHe,CAIrBxT,EAAGA,CAJkB,CAKrByY,UAAWA,SALU,CAMrB3W,OAAQA,MANa,CAOrByB,MAAOA,KAPc,CAQrB9E,IAAKA,GARgB,CAAhB,CAAP,CAWF,OAAA,CACEqD,OAASrF,SAAS8C,IAAlB,CACA9C,SAASE,IAAT,CAAgB,EAAhB,CACA,MAEF,OAAA,CACEmF,OAAS/C,WAAS,EAAT,CAAa+C,MAAb,CAAqB,CAC5BwgB,YAAa,CADe,CAE5BQ,eAAgB,CAFY,CAArB,CAAT,CAKF,OAAA,CACE,gBAAgBrjB,MAAT,CAAgB,QAAhB,CAA0BqC,MAA1B,CAAP,CAEF,OAAA,CACA,IAAK,KAAL,CACE,gBAAgBhF,IAAT,EAAP,CA5EJ,CA8ED,CACF,CAjFM,CAiFJ8C,OAjFI,CAiFKmC,KAjFL,CAAP,CAkFD,CArFwB,CAAlB,GAAP,CAsFD,CA3FW;;AAgGZghB,cAAe,sBAAA,CAAuBtkB,GAAvB,CAA4B,CACzC,WAAa,IAAb,CAEA,yBAAyBtC,oBAAoBC,IAApB,CAAyB,iBAAA,EAAoB,CACpE,2BAA2BI,IAApB,CAAyB,kBAAA,CAAmBwmB,SAAnB,CAA8B,CAC5D,MAAO,CAAP,CAAU,CACR,OAAQA,UAAUtmB,IAAV,CAAiBsmB,UAAUrmB,IAAnC,EACE,MAAA,CACEqmB,UAAUrmB,IAAV,CAAiB,CAAjB,CACA,gBAAgBgF,MAAT,CAAgBlD,GAAhB,CAAP,CAEF,MAAA,CACE,iBAAiBgB,MAAV,CAAiB,QAAjB,CAA2BujB,UAAUzjB,IAArC,CAAP,CAEF,MAAA,CACA,IAAK,KAAL,CACE,iBAAiBzC,IAAV,EAAP,CAVJ,CAYD,CACF,CAfM,CAeJmmB,QAfI,CAeMC,MAfN,CAAP,CAgBD,CAjBwB,CAAlB,GAAP,CAkBD,CArHW,CAAd,CAwHA,YAAiBC,OAAjB;;ACzgJA;AACA,AAAO,IAAMrZ,cAAY,IAAIsZ,MAAJ,CAAW,gCAAX,EAA6C,GAA7C,CAAlB;;;;AAIP,AAAO,IAAM5Y,eAAa,qBAAnB;;AAEP,AAAO,IAAMJ,mBAAiB,CAC5B,wCAD4B,EAE5B,uCAF4B,EAG5B,qCAH4B,EAI5B,oCAJ4B,CAAvB;;;AAQP,AAAO,IAAMK,sBAAoB,CAC/B,OAD+B,EAE/B,QAF+B,EAG/B,UAH+B,EAI/B,MAJ+B,EAK/B,OAL+B,EAM/B,IAN+B,EAO/B,OAP+B,EAQ/B,QAR+B,EAS/B,QAT+B,CAA1B;;;AAaP,AAAO,IAAMtD,iBAAe,CAAC,OAAD,EAAU,OAAV,CAArB;AACP,AAAO,IAAMkc,0BAAwBlc,eAAajL,GAAb,CAAiB;eAAgBkL,QAAhB;CAAjB,CAA9B;AACP,AAAO,IAAMkc,qBAAmBnc,eAAa7J,IAAb,CAAkB,GAAlB,CAAzB;AACP,AAAO,IAAM+J,oBAAkB,CAAC,KAAD,EAAQ,QAAR,EAAkB,MAAlB,EAA0B,OAA1B,EAAmC,IAAnC,EAAyC,KAAzC,CAAxB;AACP,AAAO,IAAMuD,uBAAqB,IAAIwY,MAAJ,QAAgB/b,kBAAgB/J,IAAhB,CAAqB,GAArB,CAAhB,SAA+C,GAA/C,CAA3B;;;AAGP,AAAO,IAAMgK,sBAAoB,CAAC,GAAD,CAA1B;AACP,AAAO,IAAMic,2BAAyBjc,oBAAkBpL,GAAlB,CAAsB;SAAUqL,GAAV;CAAtB,EAA6CjK,IAA7C,CAAkD,GAAlD,CAA/B;;;AAGP,AAAO,IAAM2T,6BAA2B,CAAC,IAAD,EAAO,IAAP,EAAa,OAAb,EAAsB,KAAtB,EAA6B,QAA7B,EAAuC,MAAvC,EAA+C3T,IAA/C,CAAoD,GAApD,CAAjC;;;AAGP,IAAMkK,gBAAc,CAAC,IAAD,EAAO,IAAP,EAAa,IAAb,EAAmB,IAAnB,EAAyB,IAAzB,CAApB;AACA,AAAO,IAAM2J,oBAAkB3J,cAAYlK,IAAZ,CAAiB,GAAjB,CAAxB;;;;;;;;AAQP,AAAO,IAAMsK,oCAAgC,CAC3C,UAD2C,EAE3C,OAF2C,EAG3C,QAH2C,EAI3C,SAJ2C,EAK3C,SAL2C,EAM3C,KAN2C,EAO3C,gBAP2C,EAQ3C,OAR2C,EAS3C,SAT2C,EAU3C,cAV2C,EAW3C,QAX2C,EAY3C,iBAZ2C,EAa3C,OAb2C,EAc3C,MAd2C;;AAgB3C,QAhB2C,EAiB3C,QAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C;AAoB3C,MApB2C,EAqB3C,MArB2C,EAsB3C,KAtB2C,EAuB3C,UAvB2C,EAwB3C,OAxB2C,EAyB3C,YAzB2C,EA0B3C,UA1B2C;AA2B3C,2BA3B2C;AA4B3C,OA5B2C,EA6B3C,eA7B2C,EA8B3C,SA9B2C,EA+B3C,QA/B2C,EAgC3C,QAhC2C,EAiC3C,KAjC2C,EAkC3C,OAlC2C,EAmC3C,UAnC2C,EAoC3C,SApC2C,EAqC3C,UArC2C,EAsC3C,SAtC2C,EAuC3C,SAvC2C,EAwC3C,OAxC2C,CAAtC;;;;;;;;;;;;;AAsDP,AAAO,IAAME,oCAAgC,CAC3C,KAD2C,EAE3C,SAF2C,EAG3C,MAH2C,EAI3C,WAJ2C,EAK3C,QAL2C,EAM3C,SAN2C,EAO3C,qBAP2C,EAQ3C,QAR2C;AAS3C,OAT2C,EAU3C,QAV2C,EAW3C,OAX2C,EAY3C,MAZ2C,EAa3C,MAb2C,EAc3C,OAd2C,EAe3C,QAf2C,CAAtC;;;;;AAqBP,AAAO,IAAMoB,0BAAsB,CACjC,GADiC,EAEjC,YAFiC,EAGjC,IAHiC,EAIjC,KAJiC,EAKjC,KALiC,EAMjC,GANiC,EAOjC,KAPiC,EAQjC,OARiC,EASjC5L,IATiC,CAS5B,GAT4B,CAA5B;;;;AAaP,AAAO;;AAeP,AAAO;;;;;AAMP,AAAO;;AASP,AAAO;AAMP,AAAO;;;;;;AAMP,AAAO,AAAMmK;;;AAuBb,AAAO,AAAMiV,AAA+BjV;;;AAG5C,AAAO;;;;;;AAMP,AAAO,AAAMC;;AA0Db,AAAO,AAAM+U,AAA+B/U;;;AAG5C,AAAO,IAAMiM,mBAAiB,wCAAvB;;;AAGP,AAAO;;;;AAIP,AAAO;AAgBP,AAAO;;;AAGP,AAAO,AAAM2I;;;;;;AAMb,AAAO;;;;AAIP,AAAO;;;;AAIP,AAAO;;;AAGP,AAAO;;;AAGP,AAAO;;;;AAIP,AAAO,IAAM3U,qBAAmB,CAC9B,SAD8B,EAE9B,OAF8B,EAG9B,YAH8B,EAI9B,MAJ8B,EAK9B,IAL8B,EAM9B,QAN8B,EAO9B,QAP8B,EAQ9B,SAR8B,EAS9B,KAT8B,EAU9B,UAV8B,EAW9B,IAX8B,EAY9B,KAZ8B,EAa9B,IAb8B,EAc9B,IAd8B,EAe9B,OAf8B,EAgB9B,UAhB8B,EAiB9B,YAjB8B,EAkB9B,QAlB8B,EAmB9B,QAnB8B,EAoB9B,MApB8B,EAqB9B,IArB8B,EAsB9B,IAtB8B,EAuB9B,IAvB8B,EAwB9B,IAxB8B,EAyB9B,IAzB8B,EA0B9B,IA1B8B,EA2B9B,QA3B8B,EA4B9B,QA5B8B,EA6B9B,IA7B8B,EA8B9B,IA9B8B,EA+B9B,KA/B8B,EAgC9B,QAhC8B,EAiC9B,IAjC8B,EAkC9B,QAlC8B,EAmC9B,GAnC8B,EAoC9B,KApC8B,EAqC9B,UArC8B,EAsC9B,SAtC8B,EAuC9B,OAvC8B,EAwC9B,OAxC8B,EAyC9B,UAzC8B,EA0C9B,OA1C8B,EA2C9B,IA3C8B,EA4C9B,OA5C8B,EA6C9B,IA7C8B,EA8C9B,IA9C8B,EA+C9B,OA/C8B,CAAzB;AAiDP,AAAO,IAAMkB,wBAAsB,IAAIua,MAAJ,QAAgBzb,mBAAiBrK,IAAjB,CAAsB,GAAtB,CAAhB,SAAgD,GAAhD,CAA5B;;;;;;AAMP,IAAMuK,0BAAsBD,kCAA8BtK,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,IAAM+K,yBAAuB,IAAI+a,MAAJ,CAAWvb,uBAAX,EAAgC,GAAhC,CAA7B;;AAEP,IAAME,0BAAsBD,kCAA8BxK,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,IAAM6K,yBAAuB,IAAIib,MAAJ,CAAWrb,uBAAX,EAAgC,GAAhC,CAA7B,CAEP,AAAO,AAAoCA,AAAhB,AAAyCF,AAAzC,AAE3B,AAAO,AACP,AAAO,AACP,AAAO,AAEP,AAAO;;ACrYP;;;;;;;;;AASA,AAAe,SAAS2b,UAAT,CAAiBxjB,CAAjB,EAAoB;MAC7BwI,aAAa,KAAjB;IACE,IAAF,EAAQrI,IAAR,CAAa,UAACc,KAAD,EAAQqH,OAAR,EAAoB;QACzBC,cAAcvI,EAAEsI,OAAF,EAAW3L,IAAX,GAAkBgH,GAAlB,CAAsB,CAAtB,CAApB;;QAEI4E,eAAeA,YAAY3E,OAAZ,KAAwB,IAA3C,EAAiD;mBAClC,IAAb;QACE0E,OAAF,EAAW9G,MAAX;KAFF,MAGO,IAAIgH,UAAJ,EAAgB;mBACR,KAAb;;qBAEaF,OAAb,EAAsBtI,CAAtB,EAAyB,IAAzB;;GATJ;;SAaOA,CAAP;;;ACxBF;;;;;;;;;;;AAWA,AAAe,SAASyI,cAAT,CAAsBpI,IAAtB,EAA4BL,CAA5B,EAA2C;MAAZ0I,EAAY,uEAAP,KAAO;;MAClDpI,QAAQN,EAAEK,IAAF,CAAd;;MAEIqI,EAAJ,EAAQ;QACFE,UAAUvI,KAAKsI,WAAnB;QACMI,IAAI/I,EAAE,SAAF,CAAV;;;;WAIO4I,WAAW,EAAEA,QAAQhF,OAAR,IAAmBiF,sBAAoBvK,IAApB,CAAyBsK,QAAQhF,OAAjC,CAArB,CAAlB,EAAmF;UAC3E+E,cAAcC,QAAQD,WAA5B;QACEC,OAAF,EAAWE,QAAX,CAAoBC,CAApB;gBACUJ,WAAV;;;UAGItE,WAAN,CAAkB0E,CAAlB;UACMvH,MAAN;WACOxB,CAAP;;;SAGKA,CAAP;;;AC7BF,SAASwJ,aAAT,CAAqBxJ,CAArB,EAAwB;IACpB,KAAF,EAASG,IAAT,CAAc,UAACc,KAAD,EAAQ+H,GAAR,EAAgB;QACtBC,OAAOjJ,EAAEgJ,GAAF,CAAb;QACMG,cAAcF,KAAK3G,QAAL,CAAc4G,uBAAd,EAAmC5M,MAAnC,KAA8C,CAAlE;;QAEI6M,WAAJ,EAAiB;sBACDF,IAAd,EAAoBjJ,CAApB,EAAuB,GAAvB;;GALJ;;SASOA,CAAP;;;AAGF,SAASyJ,gBAAT,CAAsBzJ,CAAtB,EAAyB;IACrB,MAAF,EAAUG,IAAV,CAAe,UAACc,KAAD,EAAQoI,IAAR,EAAiB;QACxBC,QAAQtJ,EAAEqJ,IAAF,CAAd;QACMF,cAAcG,MAAMvF,OAAN,CAAc,QAAd,EAAwBzH,MAAxB,KAAmC,CAAvD;QACI6M,WAAJ,EAAiB;sBACDG,KAAd,EAAqBtJ,CAArB,EAAwB,GAAxB;;GAJJ;;SAQOA,CAAP;CAGF;;AC7Be,SAASoJ,eAAT,CAAuB9I,KAAvB,EAA8BN,CAA9B,EAA4C;MAAXuH,GAAW,uEAAL,GAAK;;MACnDlH,OAAOC,MAAMqD,GAAN,CAAU,CAAV,CAAb;MACI,CAACtD,IAAL,EAAW;WACFL,CAAP;;;mBAEkBM,MAAMqD,GAAN,CAAU,CAAV,CALqC;MAKjD9C,OALiD,cAKjDA,OALiD;;MAMnD8I,eAAe,iBAAgB9I,OAAhB,EACQ3E,GADR,CACY;WAAU+K,GAAV,SAAiBpG,QAAQoG,GAAR,CAAjB;GADZ,EAEQ3J,IAFR,CAEa,GAFb,CAArB;;QAIM+G,WAAN,OAAsBkD,GAAtB,SAA6BoC,YAA7B,SAA6CrJ,MAAMe,QAAN,EAA7C,UAAkEkG,GAAlE;SACOvH,CAAP;;;ACTF,SAASgK,gBAAT,CAAwB7E,IAAxB,EAA8BnF,CAA9B,EAAiC;MACzB6J,SAASD,SAASzE,KAAK5E,IAAL,CAAU,QAAV,CAAT,EAA8B,EAA9B,CAAf;MACMkE,QAAQmF,SAASzE,KAAK5E,IAAL,CAAU,OAAV,CAAT,EAA6B,EAA7B,KAAoC,EAAlD;;;;;MAKI,CAACsJ,UAAU,EAAX,IAAiB,EAAjB,IAAuBpF,QAAQ,EAAnC,EAAuC;SAChCjD,MAAL;GADF,MAEO,IAAIqI,MAAJ,EAAY;;;;SAIZpJ,UAAL,CAAgB,QAAhB;;;SAGKT,CAAP;;;;;AAKF,SAASiK,eAAT,CAAuB9E,IAAvB,EAA6BnF,CAA7B,EAAgC;MAC1B8J,YAAUxL,IAAV,CAAe6G,KAAK5E,IAAL,CAAU,KAAV,CAAf,CAAJ,EAAsC;SAC/BiB,MAAL;;;SAGKxB,CAAP;CAGF;;AC1Be,SAAS2V,eAAT,CAAuBzL,OAAvB,EAAgClK,CAAhC,EAA8C;MAAXmK,IAAW,uEAAJ,EAAI;;MACvDA,KAAK7N,MAAL,KAAgB,CAApB,EAAuB;WACdmO,mBAAP;;;;;IAKAN,KAAK7M,IAAL,CAAU,GAAV,CAAF,EAAkB4M,OAAlB,EAA2BlC,GAA3B,OAAmCwC,YAAnC,EAAiDhJ,MAAjD;;;UAGMgJ,YAAN,EAAoBN,OAApB,EAA6BQ,WAA7B,CAAyCF,YAAzC;;SAEOxK,CAAP;;;ACfF,SAASyjB,uBAAT,CAA+B1Z,QAA/B,EAAyC;WAC9B3I,IAAT,CAAc,GAAd,EAAmBjB,IAAnB,CAAwB,UAACc,KAAD,EAAQZ,IAAR,EAAiB;SAClCQ,OAAL,GAAe,iBAAgBR,KAAKQ,OAArB,EAA8B+B,MAA9B,CAAqC,UAACC,GAAD,EAAMtC,IAAN,EAAe;UAC7DqK,qBAAmBtM,IAAnB,CAAwBiC,IAAxB,CAAJ,EAAmC;4BACrBsC,GAAZ,qBAAkBtC,IAAlB,EAAyBF,KAAKQ,OAAL,CAAaN,IAAb,CAAzB;;;aAGKsC,GAAP;KALa,EAMZ,EANY,CAAf;GADF;;SAUOkH,QAAP;CAGF;;AChBA;;;;;;AAMA,AAAO,IAAMnC,kCAAgC,CAC3C,UAD2C,EAE3C,OAF2C,EAG3C,QAH2C,EAI3C,SAJ2C,EAK3C,SAL2C,EAM3C,KAN2C,EAO3C,gBAP2C,EAQ3C,OAR2C,EAS3C,SAT2C,EAU3C,cAV2C,EAW3C,QAX2C,EAY3C,iBAZ2C,EAa3C,OAb2C,EAc3C,MAd2C,EAe3C,MAf2C,EAgB3C,QAhB2C,EAiB3C,QAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C;AAoB3C,MApB2C,EAqB3C,MArB2C,EAsB3C,KAtB2C,EAuB3C,OAvB2C,EAwB3C,YAxB2C,EAyB3C,UAzB2C;AA0B3C,2BA1B2C;AA2B3C,OA3B2C,EA4B3C,eA5B2C,EA6B3C,SA7B2C,EA8B3C,QA9B2C,EA+B3C,QA/B2C,EAgC3C,KAhC2C,EAiC3C,OAjC2C,EAkC3C,UAlC2C,EAmC3C,SAnC2C,EAoC3C,UApC2C,EAqC3C,SArC2C,EAsC3C,OAtC2C,CAAtC;;;;;;;;;;;;;AAoDP,AAAO,IAAME,kCAAgC,CAC3C,KAD2C,EAE3C,SAF2C,EAG3C,MAH2C,EAI3C,WAJ2C,EAK3C,QAL2C,EAM3C,SAN2C,EAO3C,qBAP2C,EAQ3C,QAR2C;AAS3C,OAT2C,EAU3C,QAV2C,EAW3C,OAX2C,EAY3C,MAZ2C,EAa3C,MAb2C,EAc3C,OAd2C,EAe3C,QAf2C,CAAtC;;;;;AAqBP,AAAO,IAAMoB,wBAAsB,CACjC,GADiC,EAEjC,YAFiC,EAGjC,IAHiC,EAIjC,KAJiC,EAKjC,KALiC,EAMjC,GANiC,EAOjC,KAPiC,EAQjC,OARiC,EASjC5L,IATiC,CAS5B,GAT4B,CAA5B;;;;AAaP,AAAO,IAAMomB,6BAAyB,CACpC,IADoC,EAEpC,GAFoC,EAGpC,GAHoC,EAIpC,OAJoC,EAKpC,IALoC,EAMpC,MANoC,EAOpC,MAPoC,EAQpC,UARoC,EASpC,OAToC,EAUpC,KAVoC,EAWpC,MAXoC,EAYpC,MAZoC,CAA/B;;AAeP,AAAO,IAAMC,gCACX,IAAIP,MAAJ,QAAgBM,2BAAuBpmB,IAAvB,CAA4B,GAA5B,CAAhB,SAAsD,GAAtD,CADK;;;;;AAMP,AAAO,IAAMsmB,8BAA0B,CACrC,CAAC,SAAD,EAAY,gBAAZ,CADqC,EAErC,CAAC,OAAD,EAAU,gBAAV,CAFqC,EAGrC,CAAC,QAAD,EAAW,gBAAX,CAHqC,EAIrC,CAAC,OAAD,EAAU,WAAV,CAJqC,EAKrC,CAAC,OAAD,EAAU,YAAV,CALqC,EAMrC,CAAC,OAAD,EAAU,YAAV,CANqC,CAAhC;;AASP,AAAO,IAAMC,kBAAc,CACzB,QADyB,EAEzB,OAFyB,EAGzB,OAHyB,EAIzB,SAJyB,CAApB;AAMP,AAAO,IAAMC,qBAAiB,IAAIV,MAAJ,CAAWS,gBAAYvmB,IAAZ,CAAiB,GAAjB,CAAX,EAAkC,GAAlC,CAAvB;;;;;;AAMP,AAAO,IAAMmK,yBAAuB,CAClC,SADkC,EAElC,gBAFkC,EAGlC,iBAHkC,EAIlC,MAJkC,EAKlC,MALkC,EAMlC,SANkC,EAOlC,qBAPkC,EAQlC,OARkC,EASlC,QATkC,EAUlC,MAVkC,EAWlC,QAXkC,EAYlC,MAZkC,EAalC,YAbkC,EAclC,WAdkC,EAelC,MAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,UAlBkC;AAmBlC,SAnBkC,CAA7B;;;AAuBP,AAAO,IAAMiV,sBAAoB,IAAI0G,MAAJ,CAAW3b,uBAAqBnK,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO,IAAMymB,wBAAoB,IAAIX,MAAJ,CAAW,qBAAX,EAAkC,GAAlC,CAA1B;;;;;;AAMP,AAAO,IAAM1b,yBAAuB,CAClC,OADkC,EAElC,QAFkC,EAGlC,QAHkC,EAIlC,KAJkC,EAKlC,UALkC,EAMlC,QANkC,EAOlC,QAPkC,EAQlC,OARkC,EASlC,MATkC,EAUlC,OAVkC,EAWlC,SAXkC,EAYlC,YAZkC,EAalC,SAbkC,EAclC,MAdkC,EAelC,QAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,MAlBkC,EAmBlC,SAnBkC,EAoBlC,UApBkC;AAqBlC,MArBkC,EAsBlC,QAtBkC,EAuBlC,UAvBkC,EAwBlC,MAxBkC,EAyBlC,MAzBkC,EA0BlC,MA1BkC,EA2BlC,UA3BkC;AA4BlC,mBA5BkC,EA6BlC,MA7BkC,EA8BlC,WA9BkC,EA+BlC,MA/BkC,EAgClC,UAhCkC,EAiClC,OAjCkC,EAkClC,MAlCkC,EAmClC,OAnCkC,EAoClC,UApCkC;AAqClC,OArCkC,EAsClC,KAtCkC;AAuClC,SAvCkC,EAwClC,SAxCkC,EAyClC,cAzCkC;AA0ClC,QA1CkC,EA2ClC,WA3CkC,EA4ClC,OA5CkC,EA6ClC,UA7CkC,EA8ClC,UA9CkC,EA+ClC,MA/CkC,EAgDlC,SAhDkC,EAiDlC,SAjDkC,EAkDlC,OAlDkC,EAmDlC,KAnDkC,EAoDlC,SApDkC,EAqDlC,MArDkC,EAsDlC,OAtDkC,EAuDlC,QAvDkC,CAA7B;;AA0DP,AAAO,IAAM+U,sBAAoB,IAAI2G,MAAJ,CAAW1b,uBAAqBpK,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO,AAAM0mB;;;AAGb,AAAO,AAAMC;;;AAGb,AAAO,AAAMC;;;;AAIb,AAAO,AAAMvc;AAiDb,AAAO,AAAMkB,AAAsClB;;;;;;AAMnD,IAAME,wBAAsBD,gCAA8BtK,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,AAAM+K,AAAkCR,AAAX;;AAEpC,IAAME,wBAAsBD,gCAA8BxK,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,AAAM6K,AAAkCJ,AAAX;;AAEpC,AAAO,AAAMoc,AAA8Bpc,AAAhB,AAAyCF,AAAzC;;AAE3B,AAAO,IAAMuc,2BAAuB,IAAIhB,MAAJ,CAAW,mBAAX,EAAgC,GAAhC,CAA7B;AACP,AAAO,IAAMiB,yBAAqB,IAAIjB,MAAJ,CAAW,4BAAX,EAAyC,GAAzC,CAA3B;AACP,AAAO,IAAMkB,eAAW,IAAIlB,MAAJ,CAAW,kBAAX,EAA+B,GAA/B,CAAjB,CAEP,AAAO,AAAMmB;;ACzSb;AACA,AAAe,SAASxX,WAAT,CAAmB1M,IAAnB,EAAyB;MAChC6H,UAAU7H,KAAKE,IAAL,CAAU,OAAV,CAAhB;MACM0H,KAAK5H,KAAKE,IAAL,CAAU,IAAV,CAAX;MACIkL,QAAQ,CAAZ;;MAEIxD,EAAJ,EAAQ;;QAEFyU,oBAAkBpe,IAAlB,CAAuB2J,EAAvB,CAAJ,EAAgC;eACrB,EAAT;;QAEEwU,oBAAkBne,IAAlB,CAAuB2J,EAAvB,CAAJ,EAAgC;eACrB,EAAT;;;;MAIAC,OAAJ,EAAa;QACPuD,UAAU,CAAd,EAAiB;;;UAGXiR,oBAAkBpe,IAAlB,CAAuB4J,OAAvB,CAAJ,EAAqC;iBAC1B,EAAT;;UAEEuU,oBAAkBne,IAAlB,CAAuB4J,OAAvB,CAAJ,EAAqC;iBAC1B,EAAT;;;;;;;QAOA4b,mBAAexlB,IAAf,CAAoB4J,OAApB,CAAJ,EAAkC;eACvB,EAAT;;;;;;;QAOE6b,sBAAkBzlB,IAAlB,CAAuB4J,OAAvB,CAAJ,EAAqC;eAC1B,EAAT;;;;SAIGuD,KAAP;;;ACnDF;;;AAGA,AAAe,SAASmB,UAAT,CAAkBtM,KAAlB,EAAyB;SAC/Bga,WAAWha,MAAMC,IAAN,CAAW,OAAX,CAAX,KAAmC,IAA1C;;;ACJF;AACA,AAAe,SAAS6L,aAAT,CAAqBpB,IAArB,EAA2B;SACjC,CAACA,KAAKxF,KAAL,CAAW,IAAX,KAAoB,EAArB,EAAyBlJ,MAAhC;;;ACFF,IAAMyP,UAAQ,IAAIqX,MAAJ,CAAW,WAAX,EAAwB,GAAxB,CAAd;;AAEA,AAAe,SAAS/W,aAAT,CAAqBR,UAArB,EAAgD;MAAfjI,OAAe,uEAAL,GAAK;;MACvDkI,SAASD,aAAa,EAA5B;;MAEIC,SAAS,CAAb,EAAgB;QACVE,oBAAJ;;;;;;;QAOID,QAAMzN,IAAN,CAAWsF,OAAX,CAAJ,EAAyB;oBACTkI,SAAS,CAAvB;KADF,MAEO;oBACSA,SAAS,IAAvB;;;WAGKI,KAAKD,GAAL,CAASC,KAAKC,GAAL,CAASH,WAAT,EAAsB,CAAtB,CAAT,EAAmC,CAAnC,CAAP;;;SAGK,CAAP;;;ACjBF;;AAEA,AAAe,SAASwY,iBAAT,CAAwBnkB,IAAxB,EAA8B;MACvCoL,QAAQ,CAAZ;MACMT,OAAO3K,KAAK2K,IAAL,GAAYC,IAAZ,EAAb;MACMY,aAAab,KAAK1O,MAAxB;;;MAGIuP,aAAa,EAAjB,EAAqB;WACZ,CAAP;;;;WAIOO,cAAYpB,IAAZ,CAAT;;;;WAISqB,cAAYR,UAAZ,CAAT;;;;;;MAMIb,KAAKsB,KAAL,CAAW,CAAC,CAAZ,MAAmB,GAAvB,EAA4B;aACjB,CAAT;;;SAGKb,KAAP;;;AChCa,SAASgB,UAAT,CAAkBnM,KAAlB,EAAyBN,CAAzB,EAA4ByL,KAA5B,EAAmC;QAC1ClL,IAAN,CAAW,OAAX,EAAoBkL,KAApB;SACOnL,KAAP;;;ACGa,SAASmkB,WAAT,CAAkBnkB,KAAlB,EAAyBN,CAAzB,EAA4BuM,MAA5B,EAAoC;MAC7C;QACId,QAAQiZ,kBAAepkB,KAAf,EAAsBN,CAAtB,IAA2BuM,MAAzC;eACSjM,KAAT,EAAgBN,CAAhB,EAAmByL,KAAnB;GAFF,CAGE,OAAOiB,CAAP,EAAU;;;;SAILpM,KAAP;;;ACXF;AACA,AAAe,SAASqkB,cAAT,CAAqBtkB,IAArB,EAA2BL,CAA3B,EAA8ByL,KAA9B,EAAqC;MAC5CX,SAASzK,KAAKyK,MAAL,EAAf;MACIA,MAAJ,EAAY;gBACDA,MAAT,EAAiB9K,CAAjB,EAAoByL,QAAQ,IAA5B;;;SAGKpL,IAAP;;;ACFF;;;AAGA,AAAe,SAASqkB,iBAAT,CAAwBpkB,KAAxB,EAA+BN,CAA/B,EAAsD;MAApB8M,WAAoB,uEAAN,IAAM;;MAC/DrB,QAAQmB,WAAStM,KAAT,CAAZ;;MAEImL,KAAJ,EAAW;WACFA,KAAP;;;UAGMmZ,aAAUtkB,KAAV,CAAR;;MAEIwM,WAAJ,EAAiB;aACNC,YAAUzM,KAAV,CAAT;;;iBAGUA,KAAZ,EAAmBN,CAAnB,EAAsByL,KAAtB;;SAEOA,KAAP;;;AClBF;;AAEA,AAAe,SAASmZ,YAAT,CAAmBtkB,KAAnB,EAA0B;mBACnBA,MAAMqD,GAAN,CAAU,CAAV,CADmB;MAC/BC,OAD+B,cAC/BA,OAD+B;;;;;;;MAMnCwgB,yBAAqB9lB,IAArB,CAA0BsF,OAA1B,CAAJ,EAAwC;WAC/B4gB,kBAAelkB,KAAf,CAAP;GADF,MAEO,IAAIsD,YAAY,KAAhB,EAAuB;WACrB,CAAP;GADK,MAEA,IAAIygB,uBAAmB/lB,IAAnB,CAAwBsF,OAAxB,CAAJ,EAAsC;WACpC,CAAP;GADK,MAEA,IAAI0gB,aAAShmB,IAAT,CAAcsF,OAAd,CAAJ,EAA4B;WAC1B,CAAC,CAAR;GADK,MAEA,IAAIA,YAAY,IAAhB,EAAsB;WACpB,CAAC,CAAR;;;SAGK,CAAP;;;ACjBF,SAAS6F,cAAT,CAAsBnJ,KAAtB,EAA6BN,CAA7B,EAAgC;MAC1BM,MAAMqD,GAAN,CAAU,CAAV,CAAJ,EAAkB;qBACIrD,MAAMqD,GAAN,CAAU,CAAV,CADJ;QACRC,OADQ,cACRA,OADQ;;QAGZA,YAAY,MAAhB,EAAwB;;sBAERtD,KAAd,EAAqBN,CAArB,EAAwB,KAAxB;;;;;AAKN,SAASqN,YAAT,CAAoB/M,KAApB,EAA2BN,CAA3B,EAA8ByL,KAA9B,EAAqC;MAC/BnL,KAAJ,EAAW;mBACIA,KAAb,EAAoBN,CAApB;gBACSM,KAAT,EAAgBN,CAAhB,EAAmByL,KAAnB;;;;AAIJ,SAASkC,SAAT,CAAiB3N,CAAjB,EAAoB8M,WAApB,EAAiC;IAC7B,QAAF,EAAY9E,GAAZ,CAAgB,SAAhB,EAA2B7H,IAA3B,CAAgC,UAACc,KAAD,EAAQZ,IAAR,EAAiB;;;QAG3CC,QAAQN,EAAEK,IAAF,CAAZ;YACQoM,WAASnM,KAAT,EAAgBN,CAAhB,EAAmB0kB,kBAAepkB,KAAf,EAAsBN,CAAtB,EAAyB8M,WAAzB,CAAnB,CAAR;;QAEM9I,UAAU1D,MAAMwK,MAAN,EAAhB;QACMwC,WAAWsX,aAAUtkB,KAAV,CAAjB;;iBAEW0D,OAAX,EAAoBhE,CAApB,EAAuBsN,QAAvB,EAAiCR,WAAjC;QACI9I,OAAJ,EAAa;;;mBAGAA,QAAQ8G,MAAR,EAAX,EAA6B9K,CAA7B,EAAgCsN,WAAW,CAA3C,EAA8CR,WAA9C;;GAbJ;;SAiBO9M,CAAP;CAGF;;ACjDA,IAAM4N,iBAAe,SAArB;;AAEA,AAAe,SAAS8C,iBAAT,CAAyB1F,IAAzB,EAA+B;SACrCA,KAAKxG,OAAL,CAAaoJ,cAAb,EAA2B,GAA3B,EAAgC3C,IAAhC,EAAP;;;ACHF;;;;0CAKA;;ACLA;;;;;;;;;;;;;;;;AAgBA,AAAO,IAAMgD,oBAAkB,IAAImV,MAAJ,CAAW,0EAAX,EAAuF,GAAvF,CAAxB;;AAEP,AAAO,IAAMjU,iBAAe,QAArB;;AAEP,AAAO,IAAMD,gBAAc,WAApB;AACP,AAAO,IAAMZ,gBAAc,WAApB;;ACbP,SAASc,eAAT,CAAuBhB,OAAvB,EAAgCnN,KAAhC,EAAuCoN,sBAAvC,EAA+D;MACzDE,cAAc,IAAlB;;;;MAIItN,QAAQ,CAAR,IAAaqN,cAAYhQ,IAAZ,CAAiB8P,OAAjB,CAAb,IAA0CA,QAAQ9R,MAAR,GAAiB,CAA/D,EAAkE;kBAClD,IAAd;;;;;MAKE2E,UAAU,CAAV,IAAemN,QAAQI,WAAR,OAA0B,OAA7C,EAAsD;kBACtC,KAAd;;;;;MAKEvN,QAAQ,CAAR,IAAamN,QAAQ9R,MAAR,GAAiB,CAA9B,IAAmC,CAAC+R,sBAAxC,EAAgE;kBAChD,KAAd;;;SAGKE,WAAP;CAGF;;AChCA;;AAEA,IAAMsW,oBAAkB,IAAIzB,MAAJ,CAAW,QAAX,CAAxB;AACA,AAAe,SAAS/S,gBAAT,CAAwBrF,IAAxB,EAA8B;SACpC6Z,kBAAgBvmB,IAAhB,CAAqB0M,IAArB,CAAP;;;ACKF;;;;;AAKA,AAAe,SAASuF,eAAT,CAAuBf,UAAvB,EAAmCC,QAAnC,EAA6CzP,CAA7C,EAAgD;MACzD,CAACwP,WAAW1E,MAAX,GAAoBxO,MAAzB,EAAiC;WACxBkT,UAAP;;;MAGIU,wBAAwBhE,KAAKC,GAAL,CAAS,EAAT,EAAasD,WAAW,IAAxB,CAA9B;MACMI,cAAc7P,EAAE,aAAF,CAApB;;aAEW8K,MAAX,GAAoBxI,QAApB,GAA+BnC,IAA/B,CAAoC,UAACc,KAAD,EAAQ2H,OAAR,EAAoB;QAChD+G,WAAW3P,EAAE4I,OAAF,CAAjB;;QAEI+a,8BAA0BrlB,IAA1B,CAA+BsK,QAAQhF,OAAvC,CAAJ,EAAqD;aAC5C,IAAP;;;QAGIgM,eAAehD,WAAS+C,QAAT,CAArB;QACIC,YAAJ,EAAkB;UACZD,aAAaH,UAAjB,EAA6B;oBACfrL,MAAZ,CAAmBwL,QAAnB;OADF,MAEO;YACDK,eAAe,CAAnB;YACMD,UAAUD,cAAYH,QAAZ,CAAhB;;;;YAIII,UAAU,IAAd,EAAoB;0BACF,EAAhB;;;;;YAKEA,WAAW,GAAf,EAAoB;0BACF,EAAhB;;;;;YAKEJ,SAASpP,IAAT,CAAc,OAAd,MAA2BiP,WAAWjP,IAAX,CAAgB,OAAhB,CAA/B,EAAyD;0BACvCkP,WAAW,GAA3B;;;YAGIQ,WAAWL,eAAeI,YAAhC;;YAEIC,YAAYC,qBAAhB,EAAuC;iBAC9BL,YAAY1L,MAAZ,CAAmBwL,QAAnB,CAAP;SADF,MAEO,IAAI/G,QAAQhF,OAAR,KAAoB,GAAxB,EAA6B;cAC5BuM,iBAAiBR,SAAS3E,IAAT,EAAvB;cACMoF,uBAAuBvE,aAAWsE,cAAX,CAA7B;;cAEIC,uBAAuB,EAAvB,IAA6BL,UAAU,IAA3C,EAAiD;mBACxCF,YAAY1L,MAAZ,CAAmBwL,QAAnB,CAAP;WADF,MAEO,IAAIS,wBAAwB,EAAxB,IAA8BL,YAAY,CAA1C,IACDM,iBAAeF,cAAf,CADH,EACmC;mBACjCN,YAAY1L,MAAZ,CAAmBwL,QAAnB,CAAP;;;;;;WAMD,IAAP;GAnDF;;SAsDOE,WAAP;;;AC5EF,UACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA;;ACDA,SAASqB,qBAAT,CAA6B5Q,KAA7B,EAAoCN,CAApC,EAAuCwQ,MAAvC,EAA+C;;;;;MAKzClQ,MAAMmQ,QAAN,CAAe,qBAAf,CAAJ,EAA2C;;;;MAIrCrO,UAAUsO,kBAAgBpQ,MAAM0K,IAAN,EAAhB,CAAhB;;MAEIoB,cAAYhK,OAAZ,IAAuB,EAA3B,EAA+B;QACvBwO,SAAS5Q,EAAE,GAAF,EAAOM,KAAP,EAAchE,MAA7B;QACMqU,aAAa3Q,EAAE,OAAF,EAAWM,KAAX,EAAkBhE,MAArC;;;QAGIqU,aAAcC,SAAS,CAA3B,EAA+B;YACvBpP,MAAN;;;;QAIIpD,gBAAgBgE,QAAQ9F,MAA9B;QACMuU,WAAW7Q,EAAE,KAAF,EAASM,KAAT,EAAgBhE,MAAjC;;;;QAII8B,gBAAgB,EAAhB,IAAsByS,aAAa,CAAvC,EAA0C;YAClCrP,MAAN;;;;QAIIuO,UAAUD,cAAYxP,KAAZ,CAAhB;;;;;QAKIkQ,SAAS,EAAT,IAAeT,UAAU,GAAzB,IAAgC3R,gBAAgB,EAApD,EAAwD;YAChDoD,MAAN;;;;;;QAMEgP,UAAU,EAAV,IAAgBT,UAAU,GAA9B,EAAmC;;;;UAI3BnM,UAAUtD,MAAMqD,GAAN,CAAU,CAAV,EAAaC,OAA7B;UACMkN,aAAalN,YAAY,IAAZ,IAAoBA,YAAY,IAAnD;UACIkN,UAAJ,EAAgB;YACRC,eAAezQ,MAAM5D,IAAN,EAArB;YACIqU,gBAAgBL,kBAAgBK,aAAa/F,IAAb,EAAhB,EAAqCsB,KAArC,CAA2C,CAAC,CAA5C,MAAmD,GAAvE,EAA4E;;;;;YAKxE9K,MAAN;;;;QAIIwP,cAAchR,EAAE,QAAF,EAAYM,KAAZ,EAAmBhE,MAAvC;;;QAGI0U,cAAc,CAAd,IAAmB5S,gBAAgB,GAAvC,EAA4C;YACpCoD,MAAN;;;;CAMN;;AC/EA,SAASsjB,YAAT,CAAoB9kB,CAApB,EAAuBuR,OAAvB,EAAgChR,IAAhC,EAAsCiR,QAAtC,EAAgD;UACxCjR,IAAN,QAAeiR,QAAf,EAAyBrR,IAAzB,CAA8B,UAACC,CAAD,EAAIC,IAAJ,EAAa;QACnC5B,MAAM4B,KAAKQ,OAAL,CAAaN,IAAb,CAAZ;QACMkR,cAAc7S,IAAIpB,OAAJ,CAAY+T,OAAZ,EAAqB9S,GAArB,CAApB;;SAEKoC,OAAL,CAAaN,IAAb,IAAqBkR,WAArB;GAJF;;;AAQF,AAAe,SAASqE,mBAAT,CAA2BtE,QAA3B,EAAqCxR,CAArC,EAAwCvB,GAAxC,EAA6C;GACzD,MAAD,EAAS,KAAT,EAAgBqC,OAAhB,CAAwB;WAAQgkB,aAAW9kB,CAAX,EAAcvB,GAAd,EAAmB8B,IAAnB,EAAyBiR,QAAzB,CAAR;GAAxB;;SAEOA,QAAP;;;ACdK,SAAS3F,YAAT,CAAoBb,IAApB,EAA0B;SACxBA,KAAKC,IAAL,GACKzG,OADL,CACa,MADb,EACqB,GADrB,EAEKlI,MAFZ;;;;;;AAQF,AAAO,SAASwT,aAAT,CAAqBxP,KAArB,EAA4B;MAC3BqR,kBAAkB9F,aAAWvL,MAAM0K,IAAN,EAAX,CAAxB;;MAEM0G,WAAWpR,MAAMc,IAAN,CAAW,GAAX,EAAgB4J,IAAhB,EAAjB;MACM4G,aAAa/F,aAAW6F,QAAX,CAAnB;;MAEIC,kBAAkB,CAAtB,EAAyB;WAChBC,aAAaD,eAApB;GADF,MAEO,IAAIA,oBAAoB,CAApB,IAAyBC,aAAa,CAA1C,EAA6C;WAC3C,CAAP;;;SAGK,CAAP;;;ACnBF,SAAS0B,YAAT,CAAoBhT,KAApB,EAA2B8S,WAA3B,EAAwC;;;MAGlC9S,MAAMgC,QAAN,GAAiBhG,MAAjB,GAA0B8W,WAA9B,EAA2C;WAClC,KAAP;;;MAGEC,gBAAc/S,KAAd,CAAJ,EAA0B;WACjB,KAAP;;;SAGK,IAAP;CAGF;;AChBA;AACA,AAAe,SAASiS,WAAT,CAAmBvH,IAAnB,EAAyBhL,CAAzB,EAA4B;;;MAGnCyT,YAAYzT,aAAWgL,IAAX,cAA0BA,IAA1B,EAAlB;SACOyI,cAAc,EAAd,GAAmBzI,IAAnB,GAA0ByI,SAAjC;;;ACLa,SAASJ,eAAT,CAAuB/S,KAAvB,EAA8B;MACrCyD,UAAUzD,MAAMyD,OAAN,GAAgBoO,OAAhB,EAAhB;MACM4S,gBAAgBhhB,QAAQ3C,IAAR,CAAa,UAAC0J,MAAD,EAAY;QACvC1C,aAAgB0C,OAAOjK,OAAP,CAAe6S,KAA/B,SAAwC5I,OAAOjK,OAAP,CAAeoH,EAA7D;WACOG,WAAWnG,QAAX,CAAoB,SAApB,CAAP;GAFoB,CAAtB;;SAKO8iB,kBAAkBxoB,SAAzB;;;ACPF;;kBAIA;;ACJA,mBACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA;;ACtBe,SAASyoB,YAAT,CAAsBC,OAAtB,EAA0C;oCAAR7S,MAAQ;UAAA;;;MACnDA,OAAO9V,MAAX,EAAmB;WACV2oB,QAAQriB,MAAR,CAAe,UAACd,MAAD,EAASojB,IAAT,EAAeC,GAAf,EAAuB;UACvC3kB,QAAQ4R,OAAO+S,GAAP,CAAZ;;UAEI3kB,SAAS,OAAOA,MAAM4kB,QAAb,KAA0B,UAAvC,EAAmD;gBACzC5kB,MAAM4kB,QAAN,EAAR;OADF,MAEO;gBACG,EAAR;;;aAGKtjB,SAASojB,IAAT,GAAgB1kB,KAAvB;KATK,EAUJ,EAVI,CAAP;;;SAaKykB,QAAQ3nB,IAAR,CAAa,EAAb,CAAP;;;ACbF,IAAM+nB,cAAc,sBAApB;AACA,IAAMC,qBAAqB,MAA3B;;AAEA,AAAe,SAASC,QAAT,CAAkBN,OAAlB,EAAsC;oCAAR7S,MAAQ;UAAA;;;MAC7CoT,WAAWR,+BAAaC,OAAb,SAAyB7S,MAAzB,EAAjB;;aACaoT,SAAShgB,KAAT,CAAe6f,WAAf,KAA+B,EAFO;;MAE9CxnB,IAF8C;;MAG/C4nB,cAAc,gBAAlB;;MAEI,CAAC5nB,IAAL,EAAW;WACF2nB,QAAP;kBACc,gBAAd;;;SAGK3nB,KAAKiH,KAAL,CAAW,IAAX,EACJwH,KADI,CACE,CADF,EAEJpQ,GAFI,CAEA,UAACwpB,IAAD,EAAU;WACNA,KAAKlhB,OAAL,CAAaihB,WAAb,EAA0B,IAA1B,CAAP;;QAEIH,mBAAmBhnB,IAAnB,CAAwBonB,IAAxB,CAAJ,EAAmC;aAC1BA,KAAKlhB,OAAL,CAAa8gB,kBAAb,EAAiC,EAAjC,CAAP;;;WAGKI,IAAP;GATG,EAWJpoB,IAXI,CAWC,IAXD,CAAP;;;;;ACfF,AAEA,wBAAe,UAAUL,QAAV,EAAoB8U,IAApB,EAA0B;SAChCwT,QAAP,kBACiBxT,IADjB,EAEe9U,QAFf;;;;;;ACHF,AAEA,IAAM0oB,SAAS,CACb,KADa,EAEb,QAFa,EAGb,SAHa,EAIb,YAJa,EAKb,eALa,EAMb,SANa,EAOb,WAPa,EAQb,aARa,EASb,gBATa,CAAf;;AAYA,SAASC,OAAT,CAAiB3e,GAAjB,EAAsBzG,KAAtB,EAA6BqlB,GAA7B,EAAkCC,IAAlC,EAAwCrnB,GAAxC,EAA6C;MACvCknB,OAAOvkB,IAAP,CAAY;WAAKmX,MAAMtR,GAAX;GAAZ,CAAJ,EAAiC,OAAO,EAAP;;SAE1Bse,QAAP,oBACkBte,GADlB,EAE+CA,GAF/C,EAGkB4e,GAHlB,EAK+BC,IAL/B,EAOernB,GAPf,EASoBwI,GATpB,EAcyBA,GAdzB,EAciCzG,QAAQ,MAAMA,KAAN,GAAc,GAAtB,GAA4B,IAd7D;;;AAmBF,4BAAe,UAAUslB,IAAV,EAAgBrnB,GAAhB,EAAqBonB,GAArB,EAA0B/jB,MAA1B,EAAkCiQ,IAAlC,EAAwC;SAC9CwT,QAAP,mBAUcxT,IAVd,EAgBWtT,GAhBX,EAqBQ,iBAAgBqD,MAAhB,EAAwB5F,GAAxB,CAA4B;WAAK0pB,QAAQrN,CAAR,EAAWzW,OAAOyW,CAAP,CAAX,EAAsBsN,GAAtB,EAA2BC,IAA3B,EAAiCrnB,GAAjC,CAAL;GAA5B,EAAwEnB,IAAxE,CAA6E,MAA7E,CArBR,EAyBcuoB,GAzBd,EA6B2BC,IA7B3B,EA+BWrnB,GA/BX;;;ACvBF,IAAMsnB,YAAY,CAChB;QACQ,OADR;QAEQ,SAFR;WAGW,yEAHX;UAAA,oBAIWvlB,KAJX,EAIkB;qBACO5B,IAAIC,KAAJ,CAAU2B,KAAV,CADP;QACNvD,QADM,cACNA,QADM;;QAEVA,QAAJ,EAAc,OAAO,IAAP;;WAEP,KAAP;;CATY,CAAlB;;AAcA+oB,SAASC,MAAT,CAAgBF,SAAhB,EAA2BG,IAA3B,CAAgC,UAACC,OAAD,EAAa;uBACtBA,QAAQC,OAA7B;CADF;;AAIA,IAAIC,gBAAJ;AACA,SAASC,OAAT,CAAiBC,EAAjB,EAAqBC,IAArB,EAA2BC,GAA3B,EAAgCC,SAAhC,EAA2C;YAC/BC,IAAI,EAAE3b,MAAMyb,GAAR,EAAJ,CAAV;UACQ7pB,KAAR;MACMkF,SAASykB,GAAGxmB,KAAH,CAAS,IAAT,EAAeymB,IAAf,CAAf;;MAEI1kB,UAAUA,OAAOokB,IAArB,EAA2B;WAClBA,IAAP,CAAY;aAAKU,SAASC,CAAT,EAAYL,IAAZ,EAAkBE,SAAlB,CAAL;KAAZ;GADF,MAEO;YACGI,OAAR;;;SAGKhlB,MAAP;;;AAGF,SAAS8kB,QAAT,CAAkB5mB,CAAlB,QAA4B0mB,SAA5B,EAAuC;;MAAjBjoB,GAAiB;;oBAChBG,IAAIC,KAAJ,CAAUJ,GAAV,CADgB;MAC7BxB,QAD6B,eAC7BA,QAD6B;;UAG7B6pB,OAAR;;MAEMC,WAAW,IAAIC,IAAJ,GAAWC,OAAX,EAAjB;MACMnB,uBAAqB7oB,QAArB,SAAiC8pB,QAAjC,UAAN;;sBAEkB/mB,EAAE,GAAF,EAAOsQ,KAAP,EAAlB,EAAkCtQ,CAAlC,EAAqCvB,GAArC;IACE,eAAF,EAAmB0B,IAAnB,CAAwB,UAACc,KAAD,EAAQZ,IAAR,EAAiB;QACjCC,QAAQN,EAAEK,IAAF,CAAd;QACMqd,OAAOpd,MAAMC,IAAN,CAAW,KAAX,CAAb;QACImd,QAAQA,KAAKpR,KAAL,CAAW,CAAX,EAAc,CAAd,MAAqB,IAAjC,EAAuC;YAC/B/L,IAAN,CAAW,KAAX,YAA0Bmd,IAA1B;;GAJJ;MAOMlK,OAAOmC,gBAAc3V,EAAE,GAAF,EAAOsQ,KAAP,EAAd,EAA8BtQ,CAA9B,EAAiC,CAAC,QAAD,CAAjC,EAA6CwT,IAA7C,EAAb;;KAEG0T,aAAH,CAAiBpB,IAAjB,EAAuBtS,IAAvB;;MAEM1R,SAASqhB,QAAQtkB,KAAR,CAAcJ,GAAd,EAAmB+U,IAAnB,EAAyB0S,IAAzB,CAA8B,UAACpkB,MAAD,EAAY;QACnD4kB,SAAJ,EAAe;cACLS,gBAAR,EAA0B,CAAC1oB,GAAD,EAAMqnB,IAAN,EAAYhkB,MAAZ,CAA1B,EAA+C,6BAA/C;cACQslB,GAAR,4GACqBnqB,QADrB,wDAGwBA,QAHxB;KAFF,MAMO;cACGmqB,GAAR,mHAEuCtB,IAFvC,iHAI4BA,IAJ5B;;GARW,CAAf;;;AAiBF,SAASqB,gBAAT,CAA0B1oB,GAA1B,EAA+BqnB,IAA/B,EAAqChkB,MAArC,EAA6C;oBACtBlD,IAAIC,KAAJ,CAAUJ,GAAV,CADsB;MACnCxB,QADmC,eACnCA,QADmC;;MAErCyF,YAAY2kB,kBAAkBpqB,QAAlB,EAA4BqqB,cAAcrqB,QAAd,CAA5B,CAAlB;MACMsqB,gBAAgBC,sBAAsB1B,IAAtB,EAA4BrnB,GAA5B,EAAiCgpB,OAAOhpB,GAAP,CAAjC,EAA8CqD,MAA9C,EAAsDwlB,cAAcrqB,QAAd,CAAtD,CAAtB;;KAEGiqB,aAAH,CAAoBO,OAAOhpB,GAAP,CAApB,gBAA4CiE,SAA5C;KACGwkB,aAAH,CAAoBO,OAAOhpB,GAAP,CAApB,qBAAiD8oB,aAAjD;KACGG,cAAH,CACE,kCADF,EAEEC,aAAalpB,GAAb,CAFF;oDAIkCgpB,OAAOhpB,GAAP,CAAlC;;;AAGF,SAAS6oB,aAAT,CAAuBrqB,QAAvB,EAAiC;MACzB8U,OAAO9U,SACV6H,KADU,CACJ,GADI,EAEV5I,GAFU,CAEN;gBAAQ0rB,EAAEC,MAAF,CAAS,CAAT,EAAYC,WAAZ,EAAR,GAAoCF,EAAEtb,KAAF,CAAQ,CAAR,CAApC;GAFM,EAGVhP,IAHU,CAGL,EAHK,CAAb;SAIUyU,IAAV;;;AAGF,SAAS4V,YAAT,CAAsBlpB,GAAtB,EAA2B;oBACJG,IAAIC,KAAJ,CAAUJ,GAAV,CADI;MACjBxB,QADiB,eACjBA,QADiB;;gCAEEA,QAA3B;;;AAGF,SAAS8qB,gBAAT,CAA0BlC,GAA1B,EAA+BY,GAA/B,EAAoC;MAC9B,CAACuB,GAAGC,UAAH,CAAcpC,GAAd,CAAL,EAAyB;YACfmC,GAAGE,SAAX,EAAsB,CAACrC,GAAD,CAAtB,EAA6BY,GAA7B;;;;AAIJ,SAAS0B,oBAAT,CAA8B1pB,GAA9B,EAAmC;MAC3BonB,MAAM4B,OAAOhpB,GAAP,CAAZ;;oBACqBG,IAAIC,KAAJ,CAAUJ,GAAV,CAFY;MAEzBxB,QAFyB,eAEzBA,QAFyB;;MAG7BypB,YAAY,KAAhB;;MAEI,CAACsB,GAAGC,UAAH,CAAcpC,GAAd,CAAL,EAAyB;gBACX,IAAZ;qBACiBA,GAAjB,gBAAkC5oB,QAAlC;qCAC+BA,QAA/B,EAA2C,6BAA3C;;;UAGQkmB,QAAQJ,aAAhB,EAA+B,CAACtkB,GAAD,CAA/B,EAAsC,kBAAtC,EAA0DioB,SAA1D;;;AAGJ,SAASe,MAAT,CAAgBhpB,GAAhB,EAAqB;oBACEG,IAAIC,KAAJ,CAAUJ,GAAV,CADF;MACXxB,QADW,eACXA,QADW;;sCAEeA,QAAlC;"}
\ No newline at end of file
+{"version":3,"file":null,"sources":["../src/utils/dom/constants.js","../src/utils/dom/brs-to-ps.js","../src/utils/dom/paragraphize.js","../src/utils/dom/convert-to-paragraphs.js","../src/utils/dom/convert-node-to.js","../src/utils/dom/clean-images.js","../src/utils/dom/strip-junk-tags.js","../src/utils/dom/clean-attributes.js","../src/extractors/generic/content/scoring/constants.js","../src/extractors/generic/content/scoring/get-weight.js","../src/extractors/generic/content/scoring/get-score.js","../src/extractors/generic/content/scoring/score-commas.js","../src/extractors/generic/content/scoring/score-length.js","../src/extractors/generic/content/scoring/score-paragraph.js","../src/extractors/generic/content/scoring/set-score.js","../src/extractors/generic/content/scoring/add-score.js","../src/extractors/generic/content/scoring/add-to-parent.js","../src/extractors/generic/content/scoring/get-or-init-score.js","../src/extractors/generic/content/scoring/score-node.js","../src/extractors/generic/content/scoring/score-content.js","../src/utils/text/normalize-spaces.js","../src/utils/text/extract-from-url.js","../src/utils/text/constants.js","../src/utils/text/article-base-url.js","../src/utils/text/has-sentence-end.js","../src/extractors/generic/content/scoring/merge-siblings.js","../src/extractors/generic/content/scoring/index.js","../src/utils/dom/clean-tags.js","../src/utils/dom/make-links-absolute.js","../src/utils/dom/link-density.js","../src/utils/dom/extract-from-selectors.js","../src/utils/dom/strip-tags.js","../src/utils/dom/within-comment.js","../src/utils/dom/node-is-sufficient.js","../src/utils/dom/get-attrs.js","../src/utils/dom/set-attr.js","../src/utils/dom/set-attrs.js","../src/utils/dom/index.js","mercury.js","../scripts/templates/insert-values.js","../scripts/templates/index.js","../scripts/templates/custom-extractor.js","../scripts/templates/custom-extractor-test.js","../scripts/generate-custom-parser.js"],"sourcesContent":["// Spacer images to be removed\nexport const SPACER_RE = new RegExp('trans|transparent|spacer|blank', 'i');\n\n// The class we will use to mark elements we want to keep\n// but would normally remove\nexport const KEEP_CLASS = 'mercury-parser-keep';\n\nexport const KEEP_SELECTORS = [\n 'iframe[src^=\"https://www.youtube.com\"]',\n 'iframe[src^=\"http://www.youtube.com\"]',\n 'iframe[src^=\"https://player.vimeo\"]',\n 'iframe[src^=\"http://player.vimeo\"]',\n];\n\n// A list of tags to strip from the output if we encounter them.\nexport const STRIP_OUTPUT_TAGS = [\n 'title',\n 'script',\n 'noscript',\n 'link',\n 'style',\n 'hr',\n 'embed',\n 'iframe',\n 'object',\n];\n\n// cleanAttributes\nexport const REMOVE_ATTRS = ['style', 'align'];\nexport const REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(selector => `[${selector}]`);\nexport const REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');\nexport const WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt'];\nexport const WHITELIST_ATTRS_RE = new RegExp(`^(${WHITELIST_ATTRS.join('|')})$`, 'i');\n\n// removeEmpty\nexport const REMOVE_EMPTY_TAGS = ['p'];\nexport const REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(tag => `${tag}:empty`).join(',');\n\n// cleanTags\nexport const CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');\n\n// cleanHeaders\nconst HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];\nexport const HEADER_TAG_LIST = HEADER_TAGS.join(',');\n\n// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n 'ad-break',\n 'adbox',\n 'advert',\n 'addthis',\n 'agegate',\n 'aux',\n 'blogger-labels',\n 'combx',\n 'comment',\n 'conversation',\n 'disqus',\n 'entry-unrelated',\n 'extra',\n 'foot',\n // 'form', // This is too generic, has too many false positives\n 'header',\n 'hidden',\n 'loader',\n 'login', // Note: This can hit 'blogindex'.\n 'menu',\n 'meta',\n 'nav',\n 'outbrain',\n 'pager',\n 'pagination',\n 'predicta', // readwriteweb inline ad box\n 'presence_control_external', // lifehacker.com container full of false positives\n 'popup',\n 'printfriendly',\n 'related',\n 'remove',\n 'remark',\n 'rss',\n 'share',\n 'shoutbox',\n 'sidebar',\n 'sociable',\n 'sponsor',\n 'taboola',\n 'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n 'and',\n 'article',\n 'body',\n 'blogindex',\n 'column',\n 'content',\n 'entry-content-asset',\n 'format', // misuse of form\n 'hfeed',\n 'hentry',\n 'hatom',\n 'main',\n 'page',\n 'posts',\n 'shadow',\n];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n 'a',\n 'blockquote',\n 'dl',\n 'div',\n 'img',\n 'p',\n 'pre',\n 'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n 'br',\n 'b',\n 'i',\n 'label',\n 'hr',\n 'area',\n 'base',\n 'basefont',\n 'input',\n 'img',\n 'link',\n 'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE =\n new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n ['.hentry', '.entry-content'],\n ['entry', '.entry-content'],\n ['.entry', '.entry_content'],\n ['.post', '.postbody'],\n ['.post', '.post_body'],\n ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = [\n 'figure',\n 'photo',\n 'image',\n 'caption',\n];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n 'article',\n 'articlecontent',\n 'instapaper_body',\n 'blog',\n 'body',\n 'content',\n 'entry-content-asset',\n 'entry',\n 'hentry',\n 'main',\n 'Normal',\n 'page',\n 'pagination',\n 'permalink',\n 'post',\n 'story',\n 'text',\n '[-_]copy', // usatoday\n '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n 'adbox',\n 'advert',\n 'author',\n 'bio',\n 'bookmark',\n 'bottom',\n 'byline',\n 'clear',\n 'com-',\n 'combx',\n 'comment',\n 'comment\\\\B',\n 'contact',\n 'copy',\n 'credit',\n 'crumb',\n 'date',\n 'deck',\n 'excerpt',\n 'featured', // tnr.com has a featured_content which throws us off\n 'foot',\n 'footer',\n 'footnote',\n 'graf',\n 'head',\n 'info',\n 'infotext', // newscientist.com copyright\n 'instapaper_ignore',\n 'jump',\n 'linebreak',\n 'link',\n 'masthead',\n 'media',\n 'meta',\n 'modal',\n 'outbrain', // slate.com junk\n 'promo',\n 'pr_', // autoblog - press release\n 'related',\n 'respond',\n 'roundcontent', // lifehacker restricted content warning\n 'scroll',\n 'secondary',\n 'share',\n 'shopping',\n 'shoutbox',\n 'side',\n 'sidebar',\n 'sponsor',\n 'stamp',\n 'sub',\n 'summary',\n 'tags',\n 'tools',\n 'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// XPath to try to determine if a page is wordpress. Not always successful.\nexport const IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nexport const EXTRANEOUS_LINK_HINTS = [\n 'print',\n 'archive',\n 'comment',\n 'discuss',\n 'e-mail',\n 'email',\n 'share',\n 'reply',\n 'all',\n 'login',\n 'sign',\n 'single',\n 'adx',\n 'entry-unrelated',\n];\nexport const EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nexport const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\n// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))', 'i');\nexport const NEXT_LINK_TEXT_RE = /(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))/i;\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nexport const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nexport const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match 2 or more consecutive tags\nexport const BR_TAGS_RE = new RegExp('( ]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp(' ]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n 'article',\n 'aside',\n 'blockquote',\n 'body',\n 'br',\n 'button',\n 'canvas',\n 'caption',\n 'col',\n 'colgroup',\n 'dd',\n 'div',\n 'dl',\n 'dt',\n 'embed',\n 'fieldset',\n 'figcaption',\n 'figure',\n 'footer',\n 'form',\n 'h1',\n 'h2',\n 'h3',\n 'h4',\n 'h5',\n 'h6',\n 'header',\n 'hgroup',\n 'hr',\n 'li',\n 'map',\n 'object',\n 'ol',\n 'output',\n 'p',\n 'pre',\n 'progress',\n 'section',\n 'table',\n 'tbody',\n 'textarea',\n 'tfoot',\n 'th',\n 'thead',\n 'tr',\n 'ul',\n 'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import { paragraphize } from './index';\n\n// ## NOTES:\n// Another good candidate for refactoring/optimizing.\n// Very imperative code, I don't love it. - AP\n\n// Given cheerio object, convert consecutive tags into\n// tags instead.\n//\n// :param $: A cheerio object\n\nexport default function brsToPs($) {\n let collapsing = false;\n $('br').each((index, element) => {\n const $element = $(element);\n const nextElement = $element.next().get(0);\n\n if (nextElement && nextElement.tagName.toLowerCase() === 'br') {\n collapsing = true;\n $element.remove();\n } else if (collapsing) {\n collapsing = false;\n // $(element).replaceWith('')\n paragraphize(element, $, true);\n }\n });\n\n return $;\n}\n","import { BLOCK_LEVEL_TAGS_RE } from './constants';\n\n// Given a node, turn it into a P if it is not already a P, and\n// make sure it conforms to the constraints of a P tag (I.E. does\n// not contain any other block tags.)\n//\n// If the node is a , it treats the following inline siblings\n// as if they were its children.\n//\n// :param node: The node to paragraphize; this is a raw node\n// :param $: The cheerio object to handle dom manipulation\n// :param br: Whether or not the passed node is a br\n\nexport default function paragraphize(node, $, br = false) {\n const $node = $(node);\n\n if (br) {\n let sibling = node.nextSibling;\n const p = $('');\n\n // while the next node is text or not a block level element\n // append it to a new p node\n while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))) {\n const nextSibling = sibling.nextSibling;\n $(sibling).appendTo(p);\n sibling = nextSibling;\n }\n\n $node.replaceWith(p);\n $node.remove();\n return $;\n }\n\n return $;\n}\n","import { brsToPs, convertNodeTo } from 'utils/dom';\n\nimport { DIV_TO_P_BLOCK_TAGS } from './constants';\n\nfunction convertDivs($) {\n $('div').each((index, div) => {\n const $div = $(div);\n const convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;\n\n if (convertable) {\n convertNodeTo($div, $, 'p');\n }\n });\n\n return $;\n}\n\nfunction convertSpans($) {\n $('span').each((index, span) => {\n const $span = $(span);\n const convertable = $span.parents('p, div').length === 0;\n if (convertable) {\n convertNodeTo($span, $, 'p');\n }\n });\n\n return $;\n}\n\n// Loop through the provided doc, and convert any p-like elements to\n// actual paragraph tags.\n//\n// Things fitting this criteria:\n// * Multiple consecutive tags.\n// * tags without block level elements inside of them\n// * tags who are not children of or tags.\n//\n// :param $: A cheerio object to search\n// :return cheerio object with new p elements\n// (By-reference mutation, though. Returned just for convenience.)\n\nexport default function convertToParagraphs($) {\n $ = brsToPs($);\n $ = convertDivs($);\n $ = convertSpans($);\n\n return $;\n}\n","import { getAttrs } from 'utils/dom';\n\nexport default function convertNodeTo($node, $, tag = 'p') {\n const node = $node.get(0);\n if (!node) {\n return $;\n }\n const attrs = getAttrs(node) || {};\n // console.log(attrs)\n\n const attribString = Reflect.ownKeys(attrs)\n .map(key => `${key}=${attrs[key]}`)\n .join(' ');\n let html;\n\n if ($.browser) {\n // In the browser, the contents of noscript tags aren't rendered, therefore\n // transforms on the noscript tag (commonly used for lazy-loading) don't work\n // as expected. This test case handles that\n html = node.tagName.toLowerCase() === 'noscript' ? $node.text() : $node.html();\n } else {\n html = $node.contents();\n }\n $node.replaceWith(\n `<${tag} ${attribString}>${html}${tag}>`\n );\n return $;\n}\n","import { SPACER_RE } from './constants';\n\nfunction cleanForHeight($img, $) {\n const height = parseInt($img.attr('height'), 10);\n const width = parseInt($img.attr('width'), 10) || 20;\n\n // Remove images that explicitly have very small heights or\n // widths, because they are most likely shims or icons,\n // which aren't very useful for reading.\n if ((height || 20) < 10 || width < 10) {\n $img.remove();\n } else if (height) {\n // Don't ever specify a height on images, so that we can\n // scale with respect to width without screwing up the\n // aspect ratio.\n $img.removeAttr('height');\n }\n\n return $;\n}\n\n// Cleans out images where the source string matches transparent/spacer/etc\n// TODO This seems very aggressive - AP\nfunction removeSpacers($img, $) {\n if (SPACER_RE.test($img.attr('src'))) {\n $img.remove();\n }\n\n return $;\n}\n\nexport default function cleanImages($article, $) {\n $article.find('img').each((index, img) => {\n const $img = $(img);\n\n cleanForHeight($img, $);\n removeSpacers($img, $);\n });\n\n return $;\n}\n","import {\n STRIP_OUTPUT_TAGS,\n KEEP_CLASS,\n} from './constants';\n\nexport default function stripJunkTags(article, $, tags = []) {\n if (tags.length === 0) {\n tags = STRIP_OUTPUT_TAGS;\n }\n\n // Remove matching elements, but ignore\n // any element with a class of mercury-parser-keep\n $(tags.join(','), article).not(`.${KEEP_CLASS}`).remove();\n\n // Remove the mercury-parser-keep class from result\n $(`.${KEEP_CLASS}`, article).removeClass(KEEP_CLASS);\n\n return $;\n}\n","import {\n getAttrs,\n setAttrs,\n} from 'utils/dom';\n\nimport { WHITELIST_ATTRS_RE } from './constants';\n\nfunction removeAllButWhitelist($article) {\n $article.find('*').each((index, node) => {\n const attrs = getAttrs(node);\n\n setAttrs(node, Reflect.ownKeys(attrs).reduce((acc, attr) => {\n if (WHITELIST_ATTRS_RE.test(attr)) {\n return { ...acc, [attr]: attrs[attr] };\n }\n\n return acc;\n }, {}));\n });\n\n return $article;\n}\n\n// function removeAttrs(article, $) {\n// REMOVE_ATTRS.forEach((attr) => {\n// $(`[${attr}]`, article).removeAttr(attr);\n// });\n// }\n\n// Remove attributes like style or align\nexport default function cleanAttributes($article) {\n // Grabbing the parent because at this point\n // $article will be wrapped in a div which will\n // have a score set on it.\n return removeAllButWhitelist(\n $article.parent().length ?\n $article.parent() : $article\n );\n}\n","// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n 'ad-break',\n 'adbox',\n 'advert',\n 'addthis',\n 'agegate',\n 'aux',\n 'blogger-labels',\n 'combx',\n 'comment',\n 'conversation',\n 'disqus',\n 'entry-unrelated',\n 'extra',\n 'foot',\n 'form',\n 'header',\n 'hidden',\n 'loader',\n 'login', // Note: This can hit 'blogindex'.\n 'menu',\n 'meta',\n 'nav',\n 'pager',\n 'pagination',\n 'predicta', // readwriteweb inline ad box\n 'presence_control_external', // lifehacker.com container full of false positives\n 'popup',\n 'printfriendly',\n 'related',\n 'remove',\n 'remark',\n 'rss',\n 'share',\n 'shoutbox',\n 'sidebar',\n 'sociable',\n 'sponsor',\n 'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n 'and',\n 'article',\n 'body',\n 'blogindex',\n 'column',\n 'content',\n 'entry-content-asset',\n 'format', // misuse of form\n 'hfeed',\n 'hentry',\n 'hatom',\n 'main',\n 'page',\n 'posts',\n 'shadow',\n];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n 'a',\n 'blockquote',\n 'dl',\n 'div',\n 'img',\n 'p',\n 'pre',\n 'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n 'br',\n 'b',\n 'i',\n 'label',\n 'hr',\n 'area',\n 'base',\n 'basefont',\n 'input',\n 'img',\n 'link',\n 'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE =\n new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n ['.hentry', '.entry-content'],\n ['entry', '.entry-content'],\n ['.entry', '.entry_content'],\n ['.post', '.postbody'],\n ['.post', '.post_body'],\n ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = [\n 'figure',\n 'photo',\n 'image',\n 'caption',\n];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n 'article',\n 'articlecontent',\n 'instapaper_body',\n 'blog',\n 'body',\n 'content',\n 'entry-content-asset',\n 'entry',\n 'hentry',\n 'main',\n 'Normal',\n 'page',\n 'pagination',\n 'permalink',\n 'post',\n 'story',\n 'text',\n '[-_]copy', // usatoday\n '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n 'adbox',\n 'advert',\n 'author',\n 'bio',\n 'bookmark',\n 'bottom',\n 'byline',\n 'clear',\n 'com-',\n 'combx',\n 'comment',\n 'comment\\\\B',\n 'contact',\n 'copy',\n 'credit',\n 'crumb',\n 'date',\n 'deck',\n 'excerpt',\n 'featured', // tnr.com has a featured_content which throws us off\n 'foot',\n 'footer',\n 'footnote',\n 'graf',\n 'head',\n 'info',\n 'infotext', // newscientist.com copyright\n 'instapaper_ignore',\n 'jump',\n 'linebreak',\n 'link',\n 'masthead',\n 'media',\n 'meta',\n 'modal',\n 'outbrain', // slate.com junk\n 'promo',\n 'pr_', // autoblog - press release\n 'related',\n 'respond',\n 'roundcontent', // lifehacker restricted content warning\n 'scroll',\n 'secondary',\n 'share',\n 'shopping',\n 'shoutbox',\n 'side',\n 'sidebar',\n 'sponsor',\n 'stamp',\n 'sub',\n 'summary',\n 'tags',\n 'tools',\n 'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// Match 2 or more consecutive tags\nexport const BR_TAGS_RE = new RegExp('( ]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp(' ]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n 'article',\n 'aside',\n 'blockquote',\n 'body',\n 'br',\n 'button',\n 'canvas',\n 'caption',\n 'col',\n 'colgroup',\n 'dd',\n 'div',\n 'dl',\n 'dt',\n 'embed',\n 'fieldset',\n 'figcaption',\n 'figure',\n 'footer',\n 'form',\n 'h1',\n 'h2',\n 'h3',\n 'h4',\n 'h5',\n 'h6',\n 'header',\n 'hgroup',\n 'hr',\n 'li',\n 'map',\n 'object',\n 'ol',\n 'output',\n 'p',\n 'pre',\n 'progress',\n 'section',\n 'table',\n 'tbody',\n 'textarea',\n 'tfoot',\n 'th',\n 'thead',\n 'tr',\n 'ul',\n 'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import {\n NEGATIVE_SCORE_RE,\n POSITIVE_SCORE_RE,\n PHOTO_HINTS_RE,\n READABILITY_ASSET,\n} from './constants';\n\n// Get the score of a node based on its className and id.\nexport default function getWeight(node) {\n const classes = node.attr('class');\n const id = node.attr('id');\n let score = 0;\n\n if (id) {\n // if id exists, try to score on both positive and negative\n if (POSITIVE_SCORE_RE.test(id)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE.test(id)) {\n score -= 25;\n }\n }\n\n if (classes) {\n if (score === 0) {\n // if classes exist and id did not contribute to score\n // try to score on both positive and negative\n if (POSITIVE_SCORE_RE.test(classes)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE.test(classes)) {\n score -= 25;\n }\n }\n\n // even if score has been set by id, add score for\n // possible photo matches\n // \"try to keep photos if we can\"\n if (PHOTO_HINTS_RE.test(classes)) {\n score += 10;\n }\n\n // add 25 if class matches entry-content-asset,\n // a class apparently instructed for use in the\n // Readability publisher guidelines\n // https://www.readability.com/developers/guidelines\n if (READABILITY_ASSET.test(classes)) {\n score += 25;\n }\n }\n\n return score;\n}\n","// returns the score of a node based on\n// the node's score attribute\n// returns null if no score set\nexport default function getScore($node) {\n // console.log(\"NODE\", $node, $node.attr('score'))\n return parseFloat($node.attr('score')) || null;\n}\n","// return 1 for every comma in text\nexport default function scoreCommas(text) {\n return (text.match(/,/g) || []).length;\n}\n","const idkRe = new RegExp('^(p|pre)$', 'i');\n\nexport default function scoreLength(textLength, tagName = 'p') {\n const chunks = textLength / 50;\n\n if (chunks > 0) {\n let lengthBonus;\n\n // No idea why p or pre are being tamped down here\n // but just following the source for now\n // Not even sure why tagName is included here,\n // since this is only being called from the context\n // of scoreParagraph\n if (idkRe.test(tagName)) {\n lengthBonus = chunks - 2;\n } else {\n lengthBonus = chunks - 1.25;\n }\n\n return Math.min(Math.max(lengthBonus, 0), 3);\n }\n\n return 0;\n}\n","import {\n scoreCommas,\n scoreLength,\n} from './index';\n\n// Score a paragraph using various methods. Things like number of\n// commas, etc. Higher is better.\nexport default function scoreParagraph(node) {\n let score = 1;\n const text = node.text().trim();\n const textLength = text.length;\n\n // If this paragraph is less than 25 characters, don't count it.\n if (textLength < 25) {\n return 0;\n }\n\n // Add points for any commas within this paragraph\n score += scoreCommas(text);\n\n // For every 50 characters in this paragraph, add another point. Up\n // to 3 points.\n score += scoreLength(textLength);\n\n // Articles can end with short paragraphs when people are being clever\n // but they can also end with short paragraphs setting up lists of junk\n // that we strip. This negative tweaks junk setup paragraphs just below\n // the cutoff threshold.\n if (text.slice(-1) === ':') {\n score -= 1;\n }\n\n return score;\n}\n","export default function setScore($node, $, score) {\n $node.attr('score', score);\n return $node;\n}\n","import {\n getOrInitScore,\n setScore,\n} from './index';\n\nexport default function addScore($node, $, amount) {\n try {\n const score = getOrInitScore($node, $) + amount;\n setScore($node, $, score);\n } catch (e) {\n // Ignoring; error occurs in scoreNode\n }\n\n return $node;\n}\n","import { addScore } from './index';\n\n// Adds 1/4 of a child's score to its parent\nexport default function addToParent(node, $, score) {\n const parent = node.parent();\n if (parent) {\n addScore(parent, $, score * 0.25);\n }\n\n return node;\n}\n","import {\n getScore,\n scoreNode,\n getWeight,\n addToParent,\n} from './index';\n\n// gets and returns the score if it exists\n// if not, initializes a score based on\n// the node's tag type\nexport default function getOrInitScore($node, $, weightNodes = true) {\n let score = getScore($node);\n\n if (score) {\n return score;\n }\n\n score = scoreNode($node);\n\n if (weightNodes) {\n score += getWeight($node);\n }\n\n addToParent($node, $, score);\n\n return score;\n}\n","import { scoreParagraph } from './index';\nimport {\n PARAGRAPH_SCORE_TAGS,\n CHILD_CONTENT_TAGS,\n BAD_TAGS,\n} from './constants';\n\n// Score an individual node. Has some smarts for paragraphs, otherwise\n// just scores based on tag.\nexport default function scoreNode($node) {\n const { tagName } = $node.get(0);\n\n // TODO: Consider ordering by most likely.\n // E.g., if divs are a more common tag on a page,\n // Could save doing that regex test on every node – AP\n if (PARAGRAPH_SCORE_TAGS.test(tagName)) {\n return scoreParagraph($node);\n } else if (tagName.toLowerCase() === 'div') {\n return 5;\n } else if (CHILD_CONTENT_TAGS.test(tagName)) {\n return 3;\n } else if (BAD_TAGS.test(tagName)) {\n return -3;\n } else if (tagName.toLowerCase() === 'th') {\n return -5;\n }\n\n return 0;\n}\n","import { convertNodeTo } from 'utils/dom';\n\nimport { HNEWS_CONTENT_SELECTORS } from './constants';\nimport {\n scoreNode,\n setScore,\n getOrInitScore,\n addScore,\n} from './index';\n\nfunction convertSpans($node, $) {\n if ($node.get(0)) {\n const { tagName } = $node.get(0);\n\n if (tagName === 'span') {\n // convert spans to divs\n convertNodeTo($node, $, 'div');\n }\n }\n}\n\nfunction addScoreTo($node, $, score) {\n if ($node) {\n convertSpans($node, $);\n addScore($node, $, score);\n }\n}\n\nfunction scorePs($, weightNodes) {\n $('p, pre').not('[score]').each((index, node) => {\n // The raw score for this paragraph, before we add any parent/child\n // scores.\n let $node = $(node);\n $node = setScore($node, $, getOrInitScore($node, $, weightNodes));\n\n const $parent = $node.parent();\n const rawScore = scoreNode($node);\n\n addScoreTo($parent, $, rawScore, weightNodes);\n if ($parent) {\n // Add half of the individual content score to the\n // grandparent\n addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);\n }\n });\n\n return $;\n}\n\n// score content. Parents get the full value of their children's\n// content score, grandparents half\nexport default function scoreContent($, weightNodes = true) {\n // First, look for special hNews based selectors and give them a big\n // boost, if they exist\n HNEWS_CONTENT_SELECTORS.forEach(([parentSelector, childSelector]) => {\n $(`${parentSelector} ${childSelector}`).each((index, node) => {\n addScore($(node).parent(parentSelector), $, 80);\n });\n });\n\n // Doubling this again\n // Previous solution caused a bug\n // in which parents weren't retaining\n // scores. This is not ideal, and\n // should be fixed.\n scorePs($, weightNodes);\n scorePs($, weightNodes);\n\n return $;\n}\n","const NORMALIZE_RE = /\\s{2,}/g;\n\nexport default function normalizeSpaces(text) {\n return text.replace(NORMALIZE_RE, ' ').trim();\n}\n","// Given a node type to search for, and a list of regular expressions,\n// look to see if this extraction can be found in the URL. Expects\n// that each expression in r_list will return group(1) as the proper\n// string to be cleaned.\n// Only used for date_published currently.\nexport default function extractFromUrl(url, regexList) {\n const matchRe = regexList.find(re => re.test(url));\n // const matchRe = null\n if (matchRe) {\n return matchRe.exec(url)[1];\n }\n\n return null;\n}\n","// An expression that looks to try to find the page digit within a URL, if\n// it exists.\n// Matches:\n// page=1\n// pg=1\n// p=1\n// paging=12\n// pag=7\n// pagination/1\n// paging/88\n// pa/83\n// p/11\n//\n// Does not match:\n// pg=102\n// page:2\nexport const PAGE_IN_HREF_RE = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');\n\nexport const HAS_ALPHA_RE = /[a-z]/i;\n\nexport const IS_ALPHA_RE = /^[a-z]+$/i;\nexport const IS_DIGIT_RE = /^[0-9]+$/i;\n","import URL from 'url';\n\nimport {\n HAS_ALPHA_RE,\n IS_ALPHA_RE,\n IS_DIGIT_RE,\n PAGE_IN_HREF_RE,\n} from './constants';\n\nfunction isGoodSegment(segment, index, firstSegmentHasLetters) {\n let goodSegment = true;\n\n // If this is purely a number, and it's the first or second\n // url_segment, it's probably a page number. Remove it.\n if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {\n goodSegment = true;\n }\n\n // If this is the first url_segment and it's just \"index\",\n // remove it\n if (index === 0 && segment.toLowerCase() === 'index') {\n goodSegment = false;\n }\n\n // If our first or second url_segment is smaller than 3 characters,\n // and the first url_segment had no alphas, remove it.\n if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {\n goodSegment = false;\n }\n\n return goodSegment;\n}\n\n// Take a URL, and return the article base of said URL. That is, no\n// pagination data exists in it. Useful for comparing to other links\n// that might have pagination data within them.\nexport default function articleBaseUrl(url, parsed) {\n const parsedUrl = parsed || URL.parse(url);\n const { protocol, host, path } = parsedUrl;\n\n let firstSegmentHasLetters = false;\n const cleanedSegments = path.split('/')\n .reverse()\n .reduce((acc, rawSegment, index) => {\n let segment = rawSegment;\n\n // Split off and save anything that looks like a file type.\n if (segment.includes('.')) {\n const [possibleSegment, fileExt] = segment.split('.');\n if (IS_ALPHA_RE.test(fileExt)) {\n segment = possibleSegment;\n }\n }\n\n // If our first or second segment has anything looking like a page\n // number, remove it.\n if (PAGE_IN_HREF_RE.test(segment) && index < 2) {\n segment = segment.replace(PAGE_IN_HREF_RE, '');\n }\n\n // If we're on the first segment, check to see if we have any\n // characters in it. The first segment is actually the last bit of\n // the URL, and this will be helpful to determine if we're on a URL\n // segment that looks like \"/2/\" for example.\n if (index === 0) {\n firstSegmentHasLetters = HAS_ALPHA_RE.test(segment);\n }\n\n // If it's not marked for deletion, push it to cleaned_segments.\n if (isGoodSegment(segment, index, firstSegmentHasLetters)) {\n acc.push(segment);\n }\n\n return acc;\n }, []);\n\n return `${protocol}//${host}${cleanedSegments.reverse().join('/')}`;\n}\n","// Given a string, return True if it appears to have an ending sentence\n// within it, false otherwise.\nconst SENTENCE_END_RE = new RegExp('.( |$)');\nexport default function hasSentenceEnd(text) {\n return SENTENCE_END_RE.test(text);\n}\n","import {\n textLength,\n linkDensity,\n} from 'utils/dom';\nimport { hasSentenceEnd } from 'utils/text';\n\nimport { NON_TOP_CANDIDATE_TAGS_RE } from './constants';\nimport { getScore } from './index';\n\n// Now that we have a top_candidate, look through the siblings of\n// it to see if any of them are decently scored. If they are, they\n// may be split parts of the content (Like two divs, a preamble and\n// a body.) Example:\n// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14\nexport default function mergeSiblings($candidate, topScore, $) {\n if (!$candidate.parent().length) {\n return $candidate;\n }\n\n const siblingScoreThreshold = Math.max(10, topScore * 0.25);\n const wrappingDiv = $('');\n\n $candidate.parent().children().each((index, sibling) => {\n const $sibling = $(sibling);\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE.test(sibling.tagName)) {\n return null;\n }\n\n const siblingScore = getScore($sibling);\n if (siblingScore) {\n if ($sibling.get(0) === $candidate.get(0)) {\n wrappingDiv.append($sibling);\n } else {\n let contentBonus = 0;\n const density = linkDensity($sibling);\n\n // If sibling has a very low link density,\n // give it a small bonus\n if (density < 0.05) {\n contentBonus += 20;\n }\n\n // If sibling has a high link density,\n // give it a penalty\n if (density >= 0.5) {\n contentBonus -= 20;\n }\n\n // If sibling node has the same class as\n // candidate, give it a bonus\n if ($sibling.attr('class') === $candidate.attr('class')) {\n contentBonus += topScore * 0.2;\n }\n\n const newScore = siblingScore + contentBonus;\n\n if (newScore >= siblingScoreThreshold) {\n return wrappingDiv.append($sibling);\n } else if (sibling.tagName === 'p') {\n const siblingContent = $sibling.text();\n const siblingContentLength = textLength(siblingContent);\n\n if (siblingContentLength > 80 && density < 0.25) {\n return wrappingDiv.append($sibling);\n } else if (siblingContentLength <= 80 && density === 0 &&\n hasSentenceEnd(siblingContent)) {\n return wrappingDiv.append($sibling);\n }\n }\n }\n }\n\n return null;\n });\n\n if (wrappingDiv.children().length === 1 &&\n wrappingDiv.children().first().get(0) === $candidate.get(0)) {\n return $candidate;\n }\n\n return wrappingDiv;\n}\n","// Scoring\nexport { default as getWeight } from './get-weight';\nexport { default as getScore } from './get-score';\nexport { default as scoreCommas } from './score-commas';\nexport { default as scoreLength } from './score-length';\nexport { default as scoreParagraph } from './score-paragraph';\nexport { default as setScore } from './set-score';\nexport { default as addScore } from './add-score';\nexport { default as addToParent } from './add-to-parent';\nexport { default as getOrInitScore } from './get-or-init-score';\nexport { default as scoreNode } from './score-node';\nexport { default as scoreContent } from './score-content';\nexport { default as findTopCandidate } from './find-top-candidate';\n","import {\n getScore,\n setScore,\n getOrInitScore,\n scoreCommas,\n} from 'extractors/generic/content/scoring';\n\nimport { CLEAN_CONDITIONALLY_TAGS } from './constants';\nimport { normalizeSpaces } from '../text';\nimport { linkDensity } from './index';\n\nfunction removeUnlessContent($node, $, weight) {\n // Explicitly save entry-content-asset tags, which are\n // noted as valuable in the Publisher guidelines. For now\n // this works everywhere. We may want to consider making\n // this less of a sure-thing later.\n if ($node.hasClass('entry-content-asset')) {\n return;\n }\n\n const content = normalizeSpaces($node.text());\n\n if (scoreCommas(content) < 10) {\n const pCount = $('p', $node).length;\n const inputCount = $('input', $node).length;\n\n // Looks like a form, too many inputs.\n if (inputCount > (pCount / 3)) {\n $node.remove();\n return;\n }\n\n const contentLength = content.length;\n const imgCount = $('img', $node).length;\n\n // Content is too short, and there are no images, so\n // this is probably junk content.\n if (contentLength < 25 && imgCount === 0) {\n $node.remove();\n return;\n }\n\n const density = linkDensity($node);\n\n // Too high of link density, is probably a menu or\n // something similar.\n // console.log(weight, density, contentLength)\n if (weight < 25 && density > 0.2 && contentLength > 75) {\n $node.remove();\n return;\n }\n\n // Too high of a link density, despite the score being\n // high.\n if (weight >= 25 && density > 0.5) {\n // Don't remove the node if it's a list and the\n // previous sibling starts with a colon though. That\n // means it's probably content.\n const tagName = $node.get(0).tagName.toLowerCase();\n const nodeIsList = tagName === 'ol' || tagName === 'ul';\n if (nodeIsList) {\n const previousNode = $node.prev();\n if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {\n return;\n }\n }\n\n $node.remove();\n return;\n }\n\n const scriptCount = $('script', $node).length;\n\n // Too many script tags, not enough content.\n if (scriptCount > 0 && contentLength < 150) {\n $node.remove();\n return;\n }\n }\n}\n\n// Given an article, clean it of some superfluous content specified by\n// tags. Things like forms, ads, etc.\n//\n// Tags is an array of tag name's to search through. (like div, form,\n// etc)\n//\n// Return this same doc.\nexport default function cleanTags($article, $) {\n $(CLEAN_CONDITIONALLY_TAGS, $article).each((index, node) => {\n const $node = $(node);\n let weight = getScore($node);\n if (!weight) {\n weight = getOrInitScore($node, $);\n setScore($node, $, weight);\n }\n\n // drop node if its weight is < 0\n if (weight < 0) {\n $node.remove();\n } else {\n // deteremine if node seems like content\n removeUnlessContent($node, $, weight);\n }\n });\n\n return $;\n}\n","/* eslint-disable */\nimport URL from 'url';\n\nimport {\n getAttrs,\n setAttr,\n} from 'utils/dom';\n\nfunction absolutize($, rootUrl, attr, $content) {\n $(`[${attr}]`, $content).each((_, node) => {\n const attrs = getAttrs(node);\n const url = attrs[attr];\n\n if (url) {\n const absoluteUrl = URL.resolve(rootUrl, url);\n setAttr(node, attr, absoluteUrl);\n }\n });\n}\n\nexport default function makeLinksAbsolute($content, $, url) {\n ['href', 'src'].forEach(attr => absolutize($, url, attr, $content));\n\n return $content;\n}\n","export function textLength(text) {\n return text.trim()\n .replace(/\\s+/g, ' ')\n .length;\n}\n\n// Determines what percentage of the text\n// in a node is link text\n// Takes a node, returns a float\nexport function linkDensity($node) {\n const totalTextLength = textLength($node.text());\n\n const linkText = $node.find('a').text();\n const linkLength = textLength(linkText);\n\n if (totalTextLength > 0) {\n return linkLength / totalTextLength;\n } else if (totalTextLength === 0 && linkLength > 0) {\n return 1;\n }\n\n return 0;\n}\n","import { withinComment } from 'utils/dom';\n\nfunction isGoodNode($node, maxChildren) {\n // If it has a number of children, it's more likely a container\n // element. Skip it.\n if ($node.children().length > maxChildren) {\n return false;\n }\n // If it looks to be within a comment, skip it.\n if (withinComment($node)) {\n return false;\n }\n\n return true;\n}\n\n// Given a a list of selectors find content that may\n// be extractable from the document. This is for flat\n// meta-information, like author, title, date published, etc.\nexport default function extractFromSelectors(\n $,\n selectors,\n maxChildren = 1,\n textOnly = true\n) {\n for (const selector of selectors) {\n const nodes = $(selector);\n\n // If we didn't get exactly one of this selector, this may be\n // a list of articles or comments. Skip it.\n if (nodes.length === 1) {\n const $node = $(nodes[0]);\n\n if (isGoodNode($node, maxChildren)) {\n let content;\n if (textOnly) {\n content = $node.text();\n } else {\n content = $node.html();\n }\n\n if (content) {\n return content;\n }\n }\n }\n }\n\n return null;\n}\n","// strips all tags from a string of text\nexport default function stripTags(text, $) {\n // Wrapping text in html element prevents errors when text\n // has no html\n const cleanText = $(`${text}`).text();\n return cleanText === '' ? text : cleanText;\n}\n","import { getAttrs } from 'utils/dom';\n\nexport default function withinComment($node) {\n const parents = $node.parents().toArray();\n const commentParent = parents.find((parent) => {\n const attrs = getAttrs(parent);\n const { class: nodeClass, id } = attrs;\n const classAndId = `${nodeClass} ${id}`;\n return classAndId.includes('comment');\n });\n\n return commentParent !== undefined;\n}\n","// Given a node, determine if it's article-like enough to return\n// param: node (a cheerio node)\n// return: boolean\n\nexport default function nodeIsSufficient($node) {\n return $node.text().trim().length >= 100;\n}\n","export default function getAttrs(node) {\n const { attribs, attributes } = node;\n\n if (!attribs && attributes) {\n const attrs = Reflect.ownKeys(attributes).reduce((acc, index) => {\n const attr = attributes[index];\n\n if (!attr.name || !attr.value) return acc;\n\n acc[attr.name] = attr.value;\n return acc;\n }, {});\n return attrs;\n }\n\n return attribs;\n}\n","export default function setAttr(node, attr, val) {\n if (node.attribs) {\n node.attribs[attr] = val;\n } else if (node.attributes) {\n node.setAttribute(attr, val);\n }\n\n return node;\n}\n","/* eslint-disable */\nexport default function setAttrs(node, attrs) {\n if (node.attribs) {\n node.attribs = attrs;\n } else if (node.attributes) {\n while(node.attributes.length > 0)\n node.removeAttribute(node.attributes[0].name);\n\n Reflect.ownKeys(attrs).forEach(key => {\n node.setAttribute(key, attrs[key]);\n })\n }\n\n return node;\n}\n","// DOM manipulation\nexport { default as stripUnlikelyCandidates } from './strip-unlikely-candidates';\nexport { default as brsToPs } from './brs-to-ps';\nexport { default as paragraphize } from './paragraphize';\nexport { default as convertToParagraphs } from './convert-to-paragraphs';\nexport { default as convertNodeTo } from './convert-node-to';\nexport { default as cleanImages } from './clean-images';\nexport { default as markToKeep } from './mark-to-keep';\nexport { default as stripJunkTags } from './strip-junk-tags';\nexport { default as cleanHOnes } from './clean-h-ones';\nexport { default as cleanAttributes } from './clean-attributes';\nexport { default as removeEmpty } from './remove-empty';\nexport { default as cleanTags } from './clean-tags';\nexport { default as cleanHeaders } from './clean-headers';\nexport { default as rewriteTopLevel } from './rewrite-top-level';\nexport { default as makeLinksAbsolute } from './make-links-absolute';\nexport { textLength, linkDensity } from './link-density';\nexport { default as extractFromMeta } from './extract-from-meta';\nexport { default as extractFromSelectors } from './extract-from-selectors';\nexport { default as stripTags } from './strip-tags';\nexport { default as withinComment } from './within-comment';\nexport { default as nodeIsSufficient } from './node-is-sufficient';\nexport { default as isWordpress } from './is-wordpress';\nexport { default as getAttrs } from './get-attrs';\nexport { default as setAttr } from './set-attr';\nexport { default as setAttrs } from './set-attrs';\n","'use strict';\n\nfunction _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; }\n\nvar _regeneratorRuntime = _interopDefault(require('babel-runtime/regenerator'));\nvar _extends = _interopDefault(require('babel-runtime/helpers/extends'));\nvar _asyncToGenerator = _interopDefault(require('babel-runtime/helpers/asyncToGenerator'));\nvar URL = _interopDefault(require('url'));\nvar cheerio = _interopDefault(require('cheerio'));\nvar _Promise = _interopDefault(require('babel-runtime/core-js/promise'));\nvar request = _interopDefault(require('request'));\nvar _Reflect$ownKeys = _interopDefault(require('babel-runtime/core-js/reflect/own-keys'));\nvar _toConsumableArray = _interopDefault(require('babel-runtime/helpers/toConsumableArray'));\nvar _defineProperty = _interopDefault(require('babel-runtime/helpers/defineProperty'));\nvar _slicedToArray = _interopDefault(require('babel-runtime/helpers/slicedToArray'));\nvar _typeof = _interopDefault(require('babel-runtime/helpers/typeof'));\nvar _getIterator = _interopDefault(require('babel-runtime/core-js/get-iterator'));\nvar _Object$keys = _interopDefault(require('babel-runtime/core-js/object/keys'));\nvar stringDirection = _interopDefault(require('string-direction'));\nvar validUrl = _interopDefault(require('valid-url'));\nvar moment = _interopDefault(require('moment'));\nvar wuzzy = _interopDefault(require('wuzzy'));\nvar difflib = _interopDefault(require('difflib'));\nvar _Array$from = _interopDefault(require('babel-runtime/core-js/array/from'));\nvar ellipsize = _interopDefault(require('ellipsize'));\n\nvar _marked = [range].map(_regeneratorRuntime.mark);\n\nfunction range() {\n var start = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;\n var end = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 1;\n return _regeneratorRuntime.wrap(function range$(_context) {\n while (1) {\n switch (_context.prev = _context.next) {\n case 0:\n if (!(start <= end)) {\n _context.next = 5;\n break;\n }\n\n _context.next = 3;\n return start += 1;\n\n case 3:\n _context.next = 0;\n break;\n\n case 5:\n case \"end\":\n return _context.stop();\n }\n }\n }, _marked[0], this);\n}\n\n// extremely simple url validation as a first step\nfunction validateUrl(_ref) {\n var hostname = _ref.hostname;\n\n // If this isn't a valid url, return an error message\n return !!hostname;\n}\n\nvar Errors = {\n badUrl: {\n error: true,\n messages: 'The url parameter passed does not look like a valid URL. Please check your data and try again.'\n }\n};\n\nvar REQUEST_HEADERS = {\n 'User-Agent': 'Readability - http://readability.com/about/'\n};\n\n// The number of milliseconds to attempt to fetch a resource before timing out.\nvar FETCH_TIMEOUT = 10000;\n\n// Content types that we do not extract content from\nvar BAD_CONTENT_TYPES = ['audio/mpeg', 'image/gif', 'image/jpeg', 'image/jpg'];\n\nvar BAD_CONTENT_TYPES_RE = new RegExp('^(' + BAD_CONTENT_TYPES.join('|') + ')$', 'i');\n\n// Use this setting as the maximum size an article can be\n// for us to attempt parsing. Defaults to 5 MB.\nvar MAX_CONTENT_LENGTH = 5242880;\n\n// Turn the global proxy on or off\n// Proxying is not currently enabled in Python source\n// so not implementing logic in port.\n\nfunction get(options) {\n // eslint-disable-line\n return new _Promise(function (resolve, reject) {\n request(options, function (err, response, body) {\n if (err) {\n reject(err);\n } else {\n resolve({ body: body, response: response });\n }\n });\n });\n}\n\n// Evaluate a response to ensure it's something we should be keeping.\n// This does not validate in the sense of a response being 200 level or\n// not. Validation here means that we haven't found reason to bail from\n// further processing of this url.\n\nfunction validateResponse(response) {\n var parseNon2xx = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;\n\n // Check if we got a valid status code\n // This isn't great, but I'm requiring a statusMessage to be set\n // before short circuiting b/c nock doesn't set it in tests\n // statusMessage only not set in nock response, in which case\n // I check statusCode, which is currently only 200 for OK responses\n // in tests\n if (response.statusMessage && response.statusMessage !== 'OK' || response.statusCode !== 200) {\n if (!response.statusCode) {\n throw new Error('Unable to fetch content. Original exception was ' + response.error);\n } else if (!parseNon2xx) {\n throw new Error('Resource returned a response status code of ' + response.statusCode + ' and resource was instructed to reject non-2xx level status codes.');\n }\n }\n\n var _response$headers = response.headers,\n contentType = _response$headers['content-type'],\n contentLength = _response$headers['content-length'];\n\n // Check that the content is not in BAD_CONTENT_TYPES\n\n if (BAD_CONTENT_TYPES_RE.test(contentType)) {\n throw new Error('Content-type for this resource was ' + contentType + ' and is not allowed.');\n }\n\n // Check that the content length is below maximum\n if (contentLength > MAX_CONTENT_LENGTH) {\n throw new Error('Content for this resource was too large. Maximum content length is ' + MAX_CONTENT_LENGTH + '.');\n }\n\n return true;\n}\n\n// Grabs the last two pieces of the URL and joins them back together\n// This is to get the 'livejournal.com' from 'erotictrains.livejournal.com'\n\n\n// Set our response attribute to the result of fetching our URL.\n// TODO: This should gracefully handle timeouts and raise the\n// proper exceptions on the many failure cases of HTTP.\n// TODO: Ensure we are not fetching something enormous. Always return\n// unicode content for HTML, with charset conversion.\n\nvar fetchResource$1 = (function () {\n var _ref2 = _asyncToGenerator(_regeneratorRuntime.mark(function _callee(url, parsedUrl) {\n var options, _ref3, response, body;\n\n return _regeneratorRuntime.wrap(function _callee$(_context) {\n while (1) {\n switch (_context.prev = _context.next) {\n case 0:\n parsedUrl = parsedUrl || URL.parse(encodeURI(url));\n\n options = {\n url: parsedUrl,\n headers: _extends({}, REQUEST_HEADERS),\n timeout: FETCH_TIMEOUT,\n // Don't set encoding; fixes issues\n // w/gzipped responses\n encoding: null,\n // Accept cookies\n jar: true,\n // Accept and decode gzip\n gzip: true,\n // Follow any redirect\n followAllRedirects: true\n };\n _context.next = 4;\n return get(options);\n\n case 4:\n _ref3 = _context.sent;\n response = _ref3.response;\n body = _ref3.body;\n _context.prev = 7;\n\n validateResponse(response);\n return _context.abrupt('return', {\n body: body,\n response: response\n });\n\n case 12:\n _context.prev = 12;\n _context.t0 = _context['catch'](7);\n return _context.abrupt('return', Errors.badUrl);\n\n case 15:\n case 'end':\n return _context.stop();\n }\n }\n }, _callee, this, [[7, 12]]);\n }));\n\n function fetchResource(_x2, _x3) {\n return _ref2.apply(this, arguments);\n }\n\n return fetchResource;\n})();\n\nfunction convertMetaProp($, from, to) {\n $('meta[' + from + ']').each(function (_, node) {\n var $node = $(node);\n\n var value = $node.attr(from);\n $node.attr(to, value);\n $node.removeAttr(from);\n });\n\n return $;\n}\n\n// For ease of use in extracting from meta tags,\n// replace the \"content\" attribute on meta tags with the\n// \"value\" attribute.\n//\n// In addition, normalize 'property' attributes to 'name' for ease of\n// querying later. See, e.g., og or twitter meta tags.\n\nfunction normalizeMetaTags($) {\n $ = convertMetaProp($, 'content', 'value');\n $ = convertMetaProp($, 'property', 'name');\n return $;\n}\n\n// Spacer images to be removed\nvar SPACER_RE = new RegExp('trans|transparent|spacer|blank', 'i');\n\n// The class we will use to mark elements we want to keep\n// but would normally remove\nvar KEEP_CLASS = 'mercury-parser-keep';\n\nvar KEEP_SELECTORS = ['iframe[src^=\"https://www.youtube.com\"]', 'iframe[src^=\"http://www.youtube.com\"]', 'iframe[src^=\"https://player.vimeo\"]', 'iframe[src^=\"http://player.vimeo\"]'];\n\n// A list of tags to strip from the output if we encounter them.\nvar STRIP_OUTPUT_TAGS = ['title', 'script', 'noscript', 'link', 'style', 'hr', 'embed', 'iframe', 'object'];\n\n// cleanAttributes\nvar REMOVE_ATTRS = ['style', 'align'];\nvar REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(function (selector) {\n return '[' + selector + ']';\n});\nvar REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');\nvar WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt'];\nvar WHITELIST_ATTRS_RE = new RegExp('^(' + WHITELIST_ATTRS.join('|') + ')$', 'i');\n\n// removeEmpty\nvar REMOVE_EMPTY_TAGS = ['p'];\nvar REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(function (tag) {\n return tag + ':empty';\n}).join(',');\n\n// cleanTags\nvar CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');\n\n// cleanHeaders\nvar HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];\nvar HEADER_TAG_LIST = HEADER_TAGS.join(',');\n\n// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nvar UNLIKELY_CANDIDATES_BLACKLIST = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot',\n// 'form', // This is too generic, has too many false positives\n'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.\n'menu', 'meta', 'nav', 'outbrain', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box\n'presence_control_external', // lifehacker.com container full of false positives\n'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'taboola', 'tools'];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nvar UNLIKELY_CANDIDATES_WHITELIST = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form\n'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nvar DIV_TO_P_BLOCK_TAGS = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\n\n\n\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\n\n\n\n\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nvar POSITIVE_SCORE_HINTS = ['article', 'articlecontent', 'instapaper_body', 'blog', 'body', 'content', 'entry-content-asset', 'entry', 'hentry', 'main', 'Normal', 'page', 'pagination', 'permalink', 'post', 'story', 'text', '[-_]copy', // usatoday\n'\\\\Bcopy'];\n\n// The above list, joined into a matching regular expression\nvar POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\n\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nvar NEGATIVE_SCORE_HINTS = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'bottom', 'byline', 'clear', 'com-', 'combx', 'comment', 'comment\\\\B', 'contact', 'copy', 'credit', 'crumb', 'date', 'deck', 'excerpt', 'featured', // tnr.com has a featured_content which throws us off\n'foot', 'footer', 'footnote', 'graf', 'head', 'info', 'infotext', // newscientist.com copyright\n'instapaper_ignore', 'jump', 'linebreak', 'link', 'masthead', 'media', 'meta', 'modal', 'outbrain', // slate.com junk\n'promo', 'pr_', // autoblog - press release\n'related', 'respond', 'roundcontent', // lifehacker restricted content warning\n'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget'];\n// The above list, joined into a matching regular expression\nvar NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// XPath to try to determine if a page is wordpress. Not always successful.\nvar IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';\n\n// Match a digit. Pretty clear.\n\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\n\n\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nvar PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\n// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))', 'i');\n\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\n\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\n\n\n// Match 2 or more consecutive tags\n\n\n// Match 1 BR tag.\n\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nvar BLOCK_LEVEL_TAGS = ['article', 'aside', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'col', 'colgroup', 'dd', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'map', 'object', 'ol', 'output', 'p', 'pre', 'progress', 'section', 'table', 'tbody', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'ul', 'video'];\nvar BLOCK_LEVEL_TAGS_RE = new RegExp('^(' + BLOCK_LEVEL_TAGS.join('|') + ')$', 'i');\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nvar candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nvar CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nvar candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nvar CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nfunction stripUnlikelyCandidates($) {\n // Loop through the provided document and remove any non-link nodes\n // that are unlikely candidates for article content.\n //\n // Links are ignored because there are very often links to content\n // that are identified as non-body-content, but may be inside\n // article-like content.\n //\n // :param $: a cheerio object to strip nodes from\n // :return $: the cleaned cheerio object\n $('*').not('a').each(function (index, node) {\n var $node = $(node);\n var classes = $node.attr('class');\n var id = $node.attr('id');\n if (!id && !classes) return;\n\n var classAndId = (classes || '') + ' ' + (id || '');\n if (CANDIDATES_WHITELIST.test(classAndId)) {\n return;\n } else if (CANDIDATES_BLACKLIST.test(classAndId)) {\n $node.remove();\n }\n });\n\n return $;\n}\n\n// ## NOTES:\n// Another good candidate for refactoring/optimizing.\n// Very imperative code, I don't love it. - AP\n\n// Given cheerio object, convert consecutive tags into\n// tags instead.\n//\n// :param $: A cheerio object\n\nfunction brsToPs$$1($) {\n var collapsing = false;\n $('br').each(function (index, element) {\n var $element = $(element);\n var nextElement = $element.next().get(0);\n\n if (nextElement && nextElement.tagName.toLowerCase() === 'br') {\n collapsing = true;\n $element.remove();\n } else if (collapsing) {\n collapsing = false;\n // $(element).replaceWith('')\n paragraphize(element, $, true);\n }\n });\n\n return $;\n}\n\n// Given a node, turn it into a P if it is not already a P, and\n// make sure it conforms to the constraints of a P tag (I.E. does\n// not contain any other block tags.)\n//\n// If the node is a , it treats the following inline siblings\n// as if they were its children.\n//\n// :param node: The node to paragraphize; this is a raw node\n// :param $: The cheerio object to handle dom manipulation\n// :param br: Whether or not the passed node is a br\n\nfunction paragraphize(node, $) {\n var br = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;\n\n var $node = $(node);\n\n if (br) {\n var sibling = node.nextSibling;\n var p = $('');\n\n // while the next node is text or not a block level element\n // append it to a new p node\n while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))) {\n var nextSibling = sibling.nextSibling;\n $(sibling).appendTo(p);\n sibling = nextSibling;\n }\n\n $node.replaceWith(p);\n $node.remove();\n return $;\n }\n\n return $;\n}\n\nfunction convertDivs($) {\n $('div').each(function (index, div) {\n var $div = $(div);\n var convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;\n\n if (convertable) {\n convertNodeTo$$1($div, $, 'p');\n }\n });\n\n return $;\n}\n\nfunction convertSpans($) {\n $('span').each(function (index, span) {\n var $span = $(span);\n var convertable = $span.parents('p, div').length === 0;\n if (convertable) {\n convertNodeTo$$1($span, $, 'p');\n }\n });\n\n return $;\n}\n\n// Loop through the provided doc, and convert any p-like elements to\n// actual paragraph tags.\n//\n// Things fitting this criteria:\n// * Multiple consecutive tags.\n// * tags without block level elements inside of them\n// * tags who are not children of or tags.\n//\n// :param $: A cheerio object to search\n// :return cheerio object with new p elements\n// (By-reference mutation, though. Returned just for convenience.)\n\nfunction convertToParagraphs$$1($) {\n $ = brsToPs$$1($);\n $ = convertDivs($);\n $ = convertSpans($);\n\n return $;\n}\n\nfunction convertNodeTo$$1($node, $) {\n var tag = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'p';\n\n var node = $node.get(0);\n if (!node) {\n return $;\n }\n var attrs = getAttrs(node) || {};\n\n var attribString = _Reflect$ownKeys(attrs).map(function (key) {\n return key + '=' + attrs[key];\n }).join(' ');\n var html = void 0;\n\n if ($.browser) {\n // In the browser, the contents of noscript tags aren't rendered, therefore\n // transforms on the noscript tag (commonly used for lazy-loading) don't work\n // as expected. This test case handles that\n html = node.tagName.toLowerCase() === 'noscript' ? $node.text() : $node.html();\n } else {\n html = $node.contents();\n }\n $node.replaceWith('<' + tag + ' ' + attribString + '>' + html + '' + tag + '>');\n return $;\n}\n\nfunction cleanForHeight($img, $) {\n var height = parseInt($img.attr('height'), 10);\n var width = parseInt($img.attr('width'), 10) || 20;\n\n // Remove images that explicitly have very small heights or\n // widths, because they are most likely shims or icons,\n // which aren't very useful for reading.\n if ((height || 20) < 10 || width < 10) {\n $img.remove();\n } else if (height) {\n // Don't ever specify a height on images, so that we can\n // scale with respect to width without screwing up the\n // aspect ratio.\n $img.removeAttr('height');\n }\n\n return $;\n}\n\n// Cleans out images where the source string matches transparent/spacer/etc\n// TODO This seems very aggressive - AP\nfunction removeSpacers($img, $) {\n if (SPACER_RE.test($img.attr('src'))) {\n $img.remove();\n }\n\n return $;\n}\n\nfunction cleanImages($article, $) {\n $article.find('img').each(function (index, img) {\n var $img = $(img);\n\n cleanForHeight($img, $);\n removeSpacers($img, $);\n });\n\n return $;\n}\n\nfunction markToKeep(article, $, url) {\n var tags = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : [];\n\n if (tags.length === 0) {\n tags = KEEP_SELECTORS;\n }\n\n if (url) {\n var _URL$parse = URL.parse(url),\n protocol = _URL$parse.protocol,\n hostname = _URL$parse.hostname;\n\n tags = [].concat(_toConsumableArray(tags), ['iframe[src^=\"' + protocol + '//' + hostname + '\"]']);\n }\n\n $(tags.join(','), article).addClass(KEEP_CLASS);\n\n return $;\n}\n\nfunction stripJunkTags(article, $) {\n var tags = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : [];\n\n if (tags.length === 0) {\n tags = STRIP_OUTPUT_TAGS;\n }\n\n // Remove matching elements, but ignore\n // any element with a class of mercury-parser-keep\n $(tags.join(','), article).not('.' + KEEP_CLASS).remove();\n\n // Remove the mercury-parser-keep class from result\n $('.' + KEEP_CLASS, article).removeClass(KEEP_CLASS);\n\n return $;\n}\n\n// H1 tags are typically the article title, which should be extracted\n// by the title extractor instead. If there's less than 3 of them (<3),\n// strip them. Otherwise, turn 'em into H2s.\n\nfunction cleanHOnes$$1(article, $) {\n var $hOnes = $('h1', article);\n\n if ($hOnes.length < 3) {\n $hOnes.each(function (index, node) {\n return $(node).remove();\n });\n } else {\n $hOnes.each(function (index, node) {\n convertNodeTo$$1($(node), $, 'h2');\n });\n }\n\n return $;\n}\n\nfunction removeAllButWhitelist($article) {\n $article.find('*').each(function (index, node) {\n var attrs = getAttrs(node);\n\n setAttrs(node, _Reflect$ownKeys(attrs).reduce(function (acc, attr) {\n if (WHITELIST_ATTRS_RE.test(attr)) {\n return _extends({}, acc, _defineProperty({}, attr, attrs[attr]));\n }\n\n return acc;\n }, {}));\n });\n\n return $article;\n}\n\n// function removeAttrs(article, $) {\n// REMOVE_ATTRS.forEach((attr) => {\n// $(`[${attr}]`, article).removeAttr(attr);\n// });\n// }\n\n// Remove attributes like style or align\nfunction cleanAttributes$$1($article) {\n // Grabbing the parent because at this point\n // $article will be wrapped in a div which will\n // have a score set on it.\n return removeAllButWhitelist($article.parent().length ? $article.parent() : $article);\n}\n\nfunction removeEmpty($article, $) {\n $article.find('p').each(function (index, p) {\n var $p = $(p);\n if ($p.find('iframe, img').length === 0 && $p.text().trim() === '') $p.remove();\n });\n\n return $;\n}\n\n// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nvar UNLIKELY_CANDIDATES_BLACKLIST$1 = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot', 'form', 'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.\n'menu', 'meta', 'nav', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box\n'presence_control_external', // lifehacker.com container full of false positives\n'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'tools'];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nvar UNLIKELY_CANDIDATES_WHITELIST$1 = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form\n'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nvar DIV_TO_P_BLOCK_TAGS$1 = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nvar NON_TOP_CANDIDATE_TAGS$1 = ['br', 'b', 'i', 'label', 'hr', 'area', 'base', 'basefont', 'input', 'img', 'link', 'meta'];\n\nvar NON_TOP_CANDIDATE_TAGS_RE$1 = new RegExp('^(' + NON_TOP_CANDIDATE_TAGS$1.join('|') + ')$', 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nvar HNEWS_CONTENT_SELECTORS$1 = [['.hentry', '.entry-content'], ['entry', '.entry-content'], ['.entry', '.entry_content'], ['.post', '.postbody'], ['.post', '.post_body'], ['.post', '.post-body']];\n\nvar PHOTO_HINTS$1 = ['figure', 'photo', 'image', 'caption'];\nvar PHOTO_HINTS_RE$1 = new RegExp(PHOTO_HINTS$1.join('|'), 'i');\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nvar POSITIVE_SCORE_HINTS$1 = ['article', 'articlecontent', 'instapaper_body', 'blog', 'body', 'content', 'entry-content-asset', 'entry', 'hentry', 'main', 'Normal', 'page', 'pagination', 'permalink', 'post', 'story', 'text', '[-_]copy', // usatoday\n'\\\\Bcopy'];\n\n// The above list, joined into a matching regular expression\nvar POSITIVE_SCORE_RE$1 = new RegExp(POSITIVE_SCORE_HINTS$1.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nvar READABILITY_ASSET$1 = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nvar NEGATIVE_SCORE_HINTS$1 = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'bottom', 'byline', 'clear', 'com-', 'combx', 'comment', 'comment\\\\B', 'contact', 'copy', 'credit', 'crumb', 'date', 'deck', 'excerpt', 'featured', // tnr.com has a featured_content which throws us off\n'foot', 'footer', 'footnote', 'graf', 'head', 'info', 'infotext', // newscientist.com copyright\n'instapaper_ignore', 'jump', 'linebreak', 'link', 'masthead', 'media', 'meta', 'modal', 'outbrain', // slate.com junk\n'promo', 'pr_', // autoblog - press release\n'related', 'respond', 'roundcontent', // lifehacker restricted content warning\n'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget'];\n// The above list, joined into a matching regular expression\nvar NEGATIVE_SCORE_RE$1 = new RegExp(NEGATIVE_SCORE_HINTS$1.join('|'), 'i');\n\n// Match a digit. Pretty clear.\n\n\n// Match 2 or more consecutive tags\n\n\n// Match 1 BR tag.\n\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\n\n\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nvar candidatesBlacklist$1 = UNLIKELY_CANDIDATES_BLACKLIST$1.join('|');\n\n\nvar candidatesWhitelist$1 = UNLIKELY_CANDIDATES_WHITELIST$1.join('|');\n\n\n\n\nvar PARAGRAPH_SCORE_TAGS$1 = new RegExp('^(p|li|span|pre)$', 'i');\nvar CHILD_CONTENT_TAGS$1 = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nvar BAD_TAGS$1 = new RegExp('^(address|form)$', 'i');\n\n// Get the score of a node based on its className and id.\nfunction getWeight(node) {\n var classes = node.attr('class');\n var id = node.attr('id');\n var score = 0;\n\n if (id) {\n // if id exists, try to score on both positive and negative\n if (POSITIVE_SCORE_RE$1.test(id)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE$1.test(id)) {\n score -= 25;\n }\n }\n\n if (classes) {\n if (score === 0) {\n // if classes exist and id did not contribute to score\n // try to score on both positive and negative\n if (POSITIVE_SCORE_RE$1.test(classes)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE$1.test(classes)) {\n score -= 25;\n }\n }\n\n // even if score has been set by id, add score for\n // possible photo matches\n // \"try to keep photos if we can\"\n if (PHOTO_HINTS_RE$1.test(classes)) {\n score += 10;\n }\n\n // add 25 if class matches entry-content-asset,\n // a class apparently instructed for use in the\n // Readability publisher guidelines\n // https://www.readability.com/developers/guidelines\n if (READABILITY_ASSET$1.test(classes)) {\n score += 25;\n }\n }\n\n return score;\n}\n\n// returns the score of a node based on\n// the node's score attribute\n// returns null if no score set\nfunction getScore($node) {\n // console.log(\"NODE\", $node, $node.attr('score'))\n return parseFloat($node.attr('score')) || null;\n}\n\n// return 1 for every comma in text\nfunction scoreCommas(text) {\n return (text.match(/,/g) || []).length;\n}\n\nvar idkRe = new RegExp('^(p|pre)$', 'i');\n\nfunction scoreLength(textLength) {\n var tagName = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'p';\n\n var chunks = textLength / 50;\n\n if (chunks > 0) {\n var lengthBonus = void 0;\n\n // No idea why p or pre are being tamped down here\n // but just following the source for now\n // Not even sure why tagName is included here,\n // since this is only being called from the context\n // of scoreParagraph\n if (idkRe.test(tagName)) {\n lengthBonus = chunks - 2;\n } else {\n lengthBonus = chunks - 1.25;\n }\n\n return Math.min(Math.max(lengthBonus, 0), 3);\n }\n\n return 0;\n}\n\n// Score a paragraph using various methods. Things like number of\n// commas, etc. Higher is better.\nfunction scoreParagraph$$1(node) {\n var score = 1;\n var text = node.text().trim();\n var textLength = text.length;\n\n // If this paragraph is less than 25 characters, don't count it.\n if (textLength < 25) {\n return 0;\n }\n\n // Add points for any commas within this paragraph\n score += scoreCommas(text);\n\n // For every 50 characters in this paragraph, add another point. Up\n // to 3 points.\n score += scoreLength(textLength);\n\n // Articles can end with short paragraphs when people are being clever\n // but they can also end with short paragraphs setting up lists of junk\n // that we strip. This negative tweaks junk setup paragraphs just below\n // the cutoff threshold.\n if (text.slice(-1) === ':') {\n score -= 1;\n }\n\n return score;\n}\n\nfunction setScore($node, $, score) {\n $node.attr('score', score);\n return $node;\n}\n\nfunction addScore$$1($node, $, amount) {\n try {\n var score = getOrInitScore$$1($node, $) + amount;\n setScore($node, $, score);\n } catch (e) {\n // Ignoring; error occurs in scoreNode\n }\n\n return $node;\n}\n\n// Adds 1/4 of a child's score to its parent\nfunction addToParent$$1(node, $, score) {\n var parent = node.parent();\n if (parent) {\n addScore$$1(parent, $, score * 0.25);\n }\n\n return node;\n}\n\n// gets and returns the score if it exists\n// if not, initializes a score based on\n// the node's tag type\nfunction getOrInitScore$$1($node, $) {\n var weightNodes = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : true;\n\n var score = getScore($node);\n\n if (score) {\n return score;\n }\n\n score = scoreNode$$1($node);\n\n if (weightNodes) {\n score += getWeight($node);\n }\n\n addToParent$$1($node, $, score);\n\n return score;\n}\n\n// Score an individual node. Has some smarts for paragraphs, otherwise\n// just scores based on tag.\nfunction scoreNode$$1($node) {\n var _$node$get = $node.get(0),\n tagName = _$node$get.tagName;\n\n // TODO: Consider ordering by most likely.\n // E.g., if divs are a more common tag on a page,\n // Could save doing that regex test on every node – AP\n\n\n if (PARAGRAPH_SCORE_TAGS$1.test(tagName)) {\n return scoreParagraph$$1($node);\n } else if (tagName.toLowerCase() === 'div') {\n return 5;\n } else if (CHILD_CONTENT_TAGS$1.test(tagName)) {\n return 3;\n } else if (BAD_TAGS$1.test(tagName)) {\n return -3;\n } else if (tagName.toLowerCase() === 'th') {\n return -5;\n }\n\n return 0;\n}\n\nfunction convertSpans$1($node, $) {\n if ($node.get(0)) {\n var _$node$get = $node.get(0),\n tagName = _$node$get.tagName;\n\n if (tagName === 'span') {\n // convert spans to divs\n convertNodeTo$$1($node, $, 'div');\n }\n }\n}\n\nfunction addScoreTo($node, $, score) {\n if ($node) {\n convertSpans$1($node, $);\n addScore$$1($node, $, score);\n }\n}\n\nfunction scorePs($, weightNodes) {\n $('p, pre').not('[score]').each(function (index, node) {\n // The raw score for this paragraph, before we add any parent/child\n // scores.\n var $node = $(node);\n $node = setScore($node, $, getOrInitScore$$1($node, $, weightNodes));\n\n var $parent = $node.parent();\n var rawScore = scoreNode$$1($node);\n\n addScoreTo($parent, $, rawScore, weightNodes);\n if ($parent) {\n // Add half of the individual content score to the\n // grandparent\n addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);\n }\n });\n\n return $;\n}\n\n// score content. Parents get the full value of their children's\n// content score, grandparents half\nfunction scoreContent$$1($) {\n var weightNodes = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : true;\n\n // First, look for special hNews based selectors and give them a big\n // boost, if they exist\n HNEWS_CONTENT_SELECTORS$1.forEach(function (_ref) {\n var _ref2 = _slicedToArray(_ref, 2),\n parentSelector = _ref2[0],\n childSelector = _ref2[1];\n\n $(parentSelector + ' ' + childSelector).each(function (index, node) {\n addScore$$1($(node).parent(parentSelector), $, 80);\n });\n });\n\n // Doubling this again\n // Previous solution caused a bug\n // in which parents weren't retaining\n // scores. This is not ideal, and\n // should be fixed.\n scorePs($, weightNodes);\n scorePs($, weightNodes);\n\n return $;\n}\n\nvar NORMALIZE_RE = /\\s{2,}/g;\n\nfunction normalizeSpaces(text) {\n return text.replace(NORMALIZE_RE, ' ').trim();\n}\n\n// Given a node type to search for, and a list of regular expressions,\n// look to see if this extraction can be found in the URL. Expects\n// that each expression in r_list will return group(1) as the proper\n// string to be cleaned.\n// Only used for date_published currently.\nfunction extractFromUrl(url, regexList) {\n var matchRe = regexList.find(function (re) {\n return re.test(url);\n });\n // const matchRe = null\n if (matchRe) {\n return matchRe.exec(url)[1];\n }\n\n return null;\n}\n\n// An expression that looks to try to find the page digit within a URL, if\n// it exists.\n// Matches:\n// page=1\n// pg=1\n// p=1\n// paging=12\n// pag=7\n// pagination/1\n// paging/88\n// pa/83\n// p/11\n//\n// Does not match:\n// pg=102\n// page:2\nvar PAGE_IN_HREF_RE = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');\n\nvar HAS_ALPHA_RE = /[a-z]/i;\n\nvar IS_ALPHA_RE = /^[a-z]+$/i;\nvar IS_DIGIT_RE = /^[0-9]+$/i;\n\nfunction pageNumFromUrl(url) {\n var matches = url.match(PAGE_IN_HREF_RE);\n if (!matches) return null;\n\n var pageNum = parseInt(matches[6], 10);\n\n // Return pageNum < 100, otherwise\n // return null\n return pageNum < 100 ? pageNum : null;\n}\n\nfunction removeAnchor(url) {\n return url.split('#')[0].replace(/\\/$/, '');\n}\n\nfunction isGoodSegment(segment, index, firstSegmentHasLetters) {\n var goodSegment = true;\n\n // If this is purely a number, and it's the first or second\n // url_segment, it's probably a page number. Remove it.\n if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {\n goodSegment = true;\n }\n\n // If this is the first url_segment and it's just \"index\",\n // remove it\n if (index === 0 && segment.toLowerCase() === 'index') {\n goodSegment = false;\n }\n\n // If our first or second url_segment is smaller than 3 characters,\n // and the first url_segment had no alphas, remove it.\n if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {\n goodSegment = false;\n }\n\n return goodSegment;\n}\n\n// Take a URL, and return the article base of said URL. That is, no\n// pagination data exists in it. Useful for comparing to other links\n// that might have pagination data within them.\nfunction articleBaseUrl(url, parsed) {\n var parsedUrl = parsed || URL.parse(url);\n var protocol = parsedUrl.protocol,\n host = parsedUrl.host,\n path = parsedUrl.path;\n\n\n var firstSegmentHasLetters = false;\n var cleanedSegments = path.split('/').reverse().reduce(function (acc, rawSegment, index) {\n var segment = rawSegment;\n\n // Split off and save anything that looks like a file type.\n if (segment.includes('.')) {\n var _segment$split = segment.split('.'),\n _segment$split2 = _slicedToArray(_segment$split, 2),\n possibleSegment = _segment$split2[0],\n fileExt = _segment$split2[1];\n\n if (IS_ALPHA_RE.test(fileExt)) {\n segment = possibleSegment;\n }\n }\n\n // If our first or second segment has anything looking like a page\n // number, remove it.\n if (PAGE_IN_HREF_RE.test(segment) && index < 2) {\n segment = segment.replace(PAGE_IN_HREF_RE, '');\n }\n\n // If we're on the first segment, check to see if we have any\n // characters in it. The first segment is actually the last bit of\n // the URL, and this will be helpful to determine if we're on a URL\n // segment that looks like \"/2/\" for example.\n if (index === 0) {\n firstSegmentHasLetters = HAS_ALPHA_RE.test(segment);\n }\n\n // If it's not marked for deletion, push it to cleaned_segments.\n if (isGoodSegment(segment, index, firstSegmentHasLetters)) {\n acc.push(segment);\n }\n\n return acc;\n }, []);\n\n return protocol + '//' + host + cleanedSegments.reverse().join('/');\n}\n\n// Given a string, return True if it appears to have an ending sentence\n// within it, false otherwise.\nvar SENTENCE_END_RE = new RegExp('.( |$)');\nfunction hasSentenceEnd(text) {\n return SENTENCE_END_RE.test(text);\n}\n\nfunction excerptContent(content) {\n var words = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 10;\n\n return content.trim().split(/\\s+/).slice(0, words).join(' ');\n}\n\n// Now that we have a top_candidate, look through the siblings of\n// it to see if any of them are decently scored. If they are, they\n// may be split parts of the content (Like two divs, a preamble and\n// a body.) Example:\n// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14\nfunction mergeSiblings($candidate, topScore, $) {\n if (!$candidate.parent().length) {\n return $candidate;\n }\n\n var siblingScoreThreshold = Math.max(10, topScore * 0.25);\n var wrappingDiv = $('');\n\n $candidate.parent().children().each(function (index, sibling) {\n var $sibling = $(sibling);\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE$1.test(sibling.tagName)) {\n return null;\n }\n\n var siblingScore = getScore($sibling);\n if (siblingScore) {\n if ($sibling.get(0) === $candidate.get(0)) {\n wrappingDiv.append($sibling);\n } else {\n var contentBonus = 0;\n var density = linkDensity($sibling);\n\n // If sibling has a very low link density,\n // give it a small bonus\n if (density < 0.05) {\n contentBonus += 20;\n }\n\n // If sibling has a high link density,\n // give it a penalty\n if (density >= 0.5) {\n contentBonus -= 20;\n }\n\n // If sibling node has the same class as\n // candidate, give it a bonus\n if ($sibling.attr('class') === $candidate.attr('class')) {\n contentBonus += topScore * 0.2;\n }\n\n var newScore = siblingScore + contentBonus;\n\n if (newScore >= siblingScoreThreshold) {\n return wrappingDiv.append($sibling);\n } else if (sibling.tagName === 'p') {\n var siblingContent = $sibling.text();\n var siblingContentLength = textLength(siblingContent);\n\n if (siblingContentLength > 80 && density < 0.25) {\n return wrappingDiv.append($sibling);\n } else if (siblingContentLength <= 80 && density === 0 && hasSentenceEnd(siblingContent)) {\n return wrappingDiv.append($sibling);\n }\n }\n }\n }\n\n return null;\n });\n\n if (wrappingDiv.children().length === 1 && wrappingDiv.children().first().get(0) === $candidate.get(0)) {\n return $candidate;\n }\n\n return wrappingDiv;\n}\n\n// After we've calculated scores, loop through all of the possible\n// candidate nodes we found and find the one with the highest score.\nfunction findTopCandidate$$1($) {\n var $candidate = void 0;\n var topScore = 0;\n\n $('[score]').each(function (index, node) {\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE$1.test(node.tagName)) {\n return;\n }\n\n var $node = $(node);\n var score = getScore($node);\n\n if (score > topScore) {\n topScore = score;\n $candidate = $node;\n }\n });\n\n // If we don't have a candidate, return the body\n // or whatever the first element is\n if (!$candidate) {\n return $('body') || $('*').first();\n }\n\n $candidate = mergeSiblings($candidate, topScore, $);\n\n return $candidate;\n}\n\n// Scoring\n\nfunction removeUnlessContent($node, $, weight) {\n // Explicitly save entry-content-asset tags, which are\n // noted as valuable in the Publisher guidelines. For now\n // this works everywhere. We may want to consider making\n // this less of a sure-thing later.\n if ($node.hasClass('entry-content-asset')) {\n return;\n }\n\n var content = normalizeSpaces($node.text());\n\n if (scoreCommas(content) < 10) {\n var pCount = $('p', $node).length;\n var inputCount = $('input', $node).length;\n\n // Looks like a form, too many inputs.\n if (inputCount > pCount / 3) {\n $node.remove();\n return;\n }\n\n var contentLength = content.length;\n var imgCount = $('img', $node).length;\n\n // Content is too short, and there are no images, so\n // this is probably junk content.\n if (contentLength < 25 && imgCount === 0) {\n $node.remove();\n return;\n }\n\n var density = linkDensity($node);\n\n // Too high of link density, is probably a menu or\n // something similar.\n // console.log(weight, density, contentLength)\n if (weight < 25 && density > 0.2 && contentLength > 75) {\n $node.remove();\n return;\n }\n\n // Too high of a link density, despite the score being\n // high.\n if (weight >= 25 && density > 0.5) {\n // Don't remove the node if it's a list and the\n // previous sibling starts with a colon though. That\n // means it's probably content.\n var tagName = $node.get(0).tagName.toLowerCase();\n var nodeIsList = tagName === 'ol' || tagName === 'ul';\n if (nodeIsList) {\n var previousNode = $node.prev();\n if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {\n return;\n }\n }\n\n $node.remove();\n return;\n }\n\n var scriptCount = $('script', $node).length;\n\n // Too many script tags, not enough content.\n if (scriptCount > 0 && contentLength < 150) {\n $node.remove();\n return;\n }\n }\n}\n\n// Given an article, clean it of some superfluous content specified by\n// tags. Things like forms, ads, etc.\n//\n// Tags is an array of tag name's to search through. (like div, form,\n// etc)\n//\n// Return this same doc.\nfunction cleanTags$$1($article, $) {\n $(CLEAN_CONDITIONALLY_TAGS, $article).each(function (index, node) {\n var $node = $(node);\n var weight = getScore($node);\n if (!weight) {\n weight = getOrInitScore$$1($node, $);\n setScore($node, $, weight);\n }\n\n // drop node if its weight is < 0\n if (weight < 0) {\n $node.remove();\n } else {\n // deteremine if node seems like content\n removeUnlessContent($node, $, weight);\n }\n });\n\n return $;\n}\n\nfunction cleanHeaders($article, $) {\n var title = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : '';\n\n $(HEADER_TAG_LIST, $article).each(function (index, header) {\n var $header = $(header);\n // Remove any headers that appear before all other p tags in the\n // document. This probably means that it was part of the title, a\n // subtitle or something else extraneous like a datestamp or byline,\n // all of which should be handled by other metadata handling.\n if ($($header, $article).prevAll('p').length === 0) {\n return $header.remove();\n }\n\n // Remove any headers that match the title exactly.\n if (normalizeSpaces($(header).text()) === title) {\n return $header.remove();\n }\n\n // If this header has a negative weight, it's probably junk.\n // Get rid of it.\n if (getWeight($(header)) < 0) {\n return $header.remove();\n }\n\n return $header;\n });\n\n return $;\n}\n\n// Rewrite the tag name to div if it's a top level node like body or\n// html to avoid later complications with multiple body tags.\n\nfunction rewriteTopLevel$$1(article, $) {\n // I'm not using context here because\n // it's problematic when converting the\n // top-level/root node - AP\n $ = convertNodeTo$$1($('html'), $, 'div');\n $ = convertNodeTo$$1($('body'), $, 'div');\n\n return $;\n}\n\n/* eslint-disable */\nfunction absolutize($, rootUrl, attr, $content) {\n $('[' + attr + ']', $content).each(function (_, node) {\n var attrs = getAttrs(node);\n var url = attrs[attr];\n\n if (url) {\n var absoluteUrl = URL.resolve(rootUrl, url);\n setAttr(node, attr, absoluteUrl);\n }\n });\n}\n\nfunction makeLinksAbsolute$$1($content, $, url) {\n ['href', 'src'].forEach(function (attr) {\n return absolutize($, url, attr, $content);\n });\n\n return $content;\n}\n\nfunction textLength(text) {\n return text.trim().replace(/\\s+/g, ' ').length;\n}\n\n// Determines what percentage of the text\n// in a node is link text\n// Takes a node, returns a float\nfunction linkDensity($node) {\n var totalTextLength = textLength($node.text());\n\n var linkText = $node.find('a').text();\n var linkLength = textLength(linkText);\n\n if (totalTextLength > 0) {\n return linkLength / totalTextLength;\n } else if (totalTextLength === 0 && linkLength > 0) {\n return 1;\n }\n\n return 0;\n}\n\n// Given a node type to search for, and a list of meta tag names to\n// search for, find a meta tag associated.\n\nfunction extractFromMeta$$1($, metaNames, cachedNames) {\n var cleanTags$$1 = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : true;\n\n var foundNames = metaNames.filter(function (name) {\n return cachedNames.indexOf(name) !== -1;\n });\n\n var _iteratorNormalCompletion = true;\n var _didIteratorError = false;\n var _iteratorError = undefined;\n\n try {\n var _loop = function _loop() {\n var name = _step.value;\n\n var type = 'name';\n var value = 'value';\n\n var nodes = $('meta[' + type + '=\"' + name + '\"]');\n\n // Get the unique value of every matching node, in case there\n // are two meta tags with the same name and value.\n // Remove empty values.\n var values = nodes.map(function (index, node) {\n return $(node).attr(value);\n }).toArray().filter(function (text) {\n return text !== '';\n });\n\n // If we have more than one value for the same name, we have a\n // conflict and can't trust any of them. Skip this name. If we have\n // zero, that means our meta tags had no values. Skip this name\n // also.\n if (values.length === 1) {\n var metaValue = void 0;\n // Meta values that contain HTML should be stripped, as they\n // weren't subject to cleaning previously.\n if (cleanTags$$1) {\n metaValue = stripTags(values[0], $);\n } else {\n metaValue = values[0];\n }\n\n return {\n v: metaValue\n };\n }\n };\n\n for (var _iterator = _getIterator(foundNames), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {\n var _ret = _loop();\n\n if ((typeof _ret === 'undefined' ? 'undefined' : _typeof(_ret)) === \"object\") return _ret.v;\n }\n\n // If nothing is found, return null\n } catch (err) {\n _didIteratorError = true;\n _iteratorError = err;\n } finally {\n try {\n if (!_iteratorNormalCompletion && _iterator.return) {\n _iterator.return();\n }\n } finally {\n if (_didIteratorError) {\n throw _iteratorError;\n }\n }\n }\n\n return null;\n}\n\nfunction isGoodNode($node, maxChildren) {\n // If it has a number of children, it's more likely a container\n // element. Skip it.\n if ($node.children().length > maxChildren) {\n return false;\n }\n // If it looks to be within a comment, skip it.\n if (withinComment$$1($node)) {\n return false;\n }\n\n return true;\n}\n\n// Given a a list of selectors find content that may\n// be extractable from the document. This is for flat\n// meta-information, like author, title, date published, etc.\nfunction extractFromSelectors$$1($, selectors) {\n var maxChildren = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 1;\n var textOnly = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : true;\n var _iteratorNormalCompletion = true;\n var _didIteratorError = false;\n var _iteratorError = undefined;\n\n try {\n for (var _iterator = _getIterator(selectors), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {\n var selector = _step.value;\n\n var nodes = $(selector);\n\n // If we didn't get exactly one of this selector, this may be\n // a list of articles or comments. Skip it.\n if (nodes.length === 1) {\n var $node = $(nodes[0]);\n\n if (isGoodNode($node, maxChildren)) {\n var content = void 0;\n if (textOnly) {\n content = $node.text();\n } else {\n content = $node.html();\n }\n\n if (content) {\n return content;\n }\n }\n }\n }\n } catch (err) {\n _didIteratorError = true;\n _iteratorError = err;\n } finally {\n try {\n if (!_iteratorNormalCompletion && _iterator.return) {\n _iterator.return();\n }\n } finally {\n if (_didIteratorError) {\n throw _iteratorError;\n }\n }\n }\n\n return null;\n}\n\n// strips all tags from a string of text\nfunction stripTags(text, $) {\n // Wrapping text in html element prevents errors when text\n // has no html\n var cleanText = $('' + text + '').text();\n return cleanText === '' ? text : cleanText;\n}\n\nfunction withinComment$$1($node) {\n var parents = $node.parents().toArray();\n var commentParent = parents.find(function (parent) {\n var attrs = getAttrs(parent);\n var nodeClass = attrs.class,\n id = attrs.id;\n\n var classAndId = nodeClass + ' ' + id;\n return classAndId.includes('comment');\n });\n\n return commentParent !== undefined;\n}\n\n// Given a node, determine if it's article-like enough to return\n// param: node (a cheerio node)\n// return: boolean\n\nfunction nodeIsSufficient($node) {\n return $node.text().trim().length >= 100;\n}\n\nfunction isWordpress($) {\n return $(IS_WP_SELECTOR).length > 0;\n}\n\nfunction getAttrs(node) {\n var attribs = node.attribs,\n attributes = node.attributes;\n\n\n if (!attribs && attributes) {\n var attrs = _Reflect$ownKeys(attributes).reduce(function (acc, index) {\n var attr = attributes[index];\n\n acc[attr.name] = attr.value;\n return acc;\n }, {});\n return attrs;\n }\n\n return attribs;\n}\n\nfunction setAttr(node, attr, val) {\n if (node.attribs) {\n node.attribs[attr] = val;\n } else if (node.attributes) {\n node.setAttribute(attr, val);\n }\n\n return node;\n}\n\n/* eslint-disable */\nfunction setAttrs(node, attrs) {\n if (node.attribs) {\n node.attribs = attrs;\n } else if (node.attributes) {\n while (node.attributes.length > 0) {\n node.removeAttribute(node.attributes[0].name);\n }_Reflect$ownKeys(attrs).forEach(function (key) {\n node.setAttribute(key, attrs[key]);\n });\n }\n\n return node;\n}\n\n// DOM manipulation\n\nvar IS_LINK = new RegExp('https?://', 'i');\nvar IS_IMAGE = new RegExp('.(png|gif|jpe?g)', 'i');\n\nvar TAGS_TO_REMOVE = ['script', 'style', 'form'].join(',');\n\n// Convert all instances of images with potentially\n// lazy loaded images into normal images.\n// Many sites will have img tags with no source, or an image tag with a src\n// attribute that a is a placeholer. We need to be able to properly fill in\n// the src attribute so the images are no longer lazy loaded.\nfunction convertLazyLoadedImages($) {\n $('img').each(function (_, img) {\n var attrs = getAttrs(img);\n\n _Reflect$ownKeys(attrs).forEach(function (attr) {\n var value = attrs[attr];\n\n if (attr !== 'src' && IS_LINK.test(value) && IS_IMAGE.test(value)) {\n $(img).attr('src', value);\n }\n });\n });\n\n return $;\n}\n\nfunction isComment(index, node) {\n return node.type === 'comment';\n}\n\nfunction cleanComments($) {\n $('*').first().contents().filter(isComment).remove();\n\n return $;\n}\n\nfunction clean($) {\n $(TAGS_TO_REMOVE).remove();\n\n $ = cleanComments($);\n return $;\n}\n\nvar Resource = {\n\n // Create a Resource.\n //\n // :param url: The URL for the document we should retrieve.\n // :param response: If set, use as the response rather than\n // attempting to fetch it ourselves. Expects a\n // string.\n create: function create(url, preparedResponse, parsedUrl) {\n var _this = this;\n\n return _asyncToGenerator(_regeneratorRuntime.mark(function _callee() {\n var result, validResponse;\n return _regeneratorRuntime.wrap(function _callee$(_context) {\n while (1) {\n switch (_context.prev = _context.next) {\n case 0:\n result = void 0;\n\n if (!preparedResponse) {\n _context.next = 6;\n break;\n }\n\n validResponse = {\n statusMessage: 'OK',\n statusCode: 200,\n headers: {\n 'content-type': 'text/html',\n 'content-length': 500\n }\n };\n\n\n result = { body: preparedResponse, response: validResponse };\n _context.next = 9;\n break;\n\n case 6:\n _context.next = 8;\n return fetchResource$1(url, parsedUrl);\n\n case 8:\n result = _context.sent;\n\n case 9:\n if (!result.error) {\n _context.next = 11;\n break;\n }\n\n return _context.abrupt('return', result);\n\n case 11:\n return _context.abrupt('return', _this.generateDoc(result));\n\n case 12:\n case 'end':\n return _context.stop();\n }\n }\n }, _callee, _this);\n }))();\n },\n generateDoc: function generateDoc(_ref) {\n var content = _ref.body,\n response = _ref.response;\n var contentType = response.headers['content-type'];\n\n // TODO: Implement is_text function from\n // https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57\n\n if (!contentType.includes('html') && !contentType.includes('text')) {\n throw new Error('Content does not appear to be text.');\n }\n\n var $ = cheerio.load(content, { normalizeWhitespace: true });\n\n if ($('*').first().children().length === 0) {\n throw new Error('No children, likely a bad parse.');\n }\n\n $ = normalizeMetaTags($);\n $ = convertLazyLoadedImages($);\n $ = clean($);\n\n return $;\n }\n};\n\nvar merge = function merge(extractor, domains) {\n return domains.reduce(function (acc, domain) {\n acc[domain] = extractor;\n return acc;\n }, {});\n};\n\nfunction mergeSupportedDomains(extractor) {\n return extractor.supportedDomains ? merge(extractor, [extractor.domain].concat(_toConsumableArray(extractor.supportedDomains))) : merge(extractor, [extractor.domain]);\n}\n\nvar BloggerExtractor = {\n domain: 'blogspot.com',\n content: {\n // Blogger is insane and does not load its content\n // initially in the page, but it's all there\n // in noscript\n selectors: ['.post-content noscript'],\n\n // Selectors to remove from the extracted content\n clean: [],\n\n // Convert the noscript tag to a div\n transforms: {\n noscript: 'div'\n }\n },\n\n author: {\n selectors: ['.post-author-name']\n },\n\n title: {\n selectors: ['.post h2.title']\n },\n\n date_published: {\n selectors: ['span.publishdate']\n }\n};\n\nvar NYMagExtractor = {\n domain: 'nymag.com',\n content: {\n // Order by most likely. Extractor will stop on first occurrence\n selectors: ['div.article-content', 'section.body', 'article.article'],\n\n // Selectors to remove from the extracted content\n clean: ['.ad', '.single-related-story'],\n\n // Object of tranformations to make on matched elements\n // Each key is the selector, each value is the tag to\n // transform to.\n // If a function is given, it should return a string\n // to convert to or nothing (in which case it will not perform\n // the transformation.\n transforms: {\n // Convert h1s to h2s\n h1: 'h2',\n\n // Convert lazy-loaded noscript images to figures\n noscript: function noscript($node, $) {\n if ($.browser) {\n var $children = $($node.text());\n\n if ($children.length === 1 && $children.get(0) !== undefined && $children.get(0).tagName.toLowerCase() === 'img') {\n return 'figure';\n }\n } else {\n var _$children = $node.children();\n if (_$children.length === 1 && _$children.get(0).tagName === 'img') {\n return 'figure';\n }\n }\n\n return null;\n }\n }\n },\n\n title: {\n selectors: ['h1.lede-feature-title', 'h1.headline-primary', 'h1']\n },\n\n author: {\n selectors: ['.by-authors', '.lede-feature-author']\n },\n\n dek: {\n selectors: ['.lede-feature-teaser']\n },\n\n date_published: {\n selectors: [['time.article-timestamp[datetime]', 'datetime'], 'time.article-timestamp']\n }\n};\n\nvar WikipediaExtractor = {\n domain: 'wikipedia.org',\n content: {\n selectors: ['#mw-content-text'],\n\n defaultCleaner: false,\n\n // transform top infobox to an image with caption\n transforms: {\n '.infobox img': function infoboxImg($node) {\n var $parent = $node.parents('.infobox');\n // Only prepend the first image in .infobox\n if ($parent.children('img').length === 0) {\n $parent.prepend($node);\n }\n },\n '.infobox caption': 'figcaption',\n '.infobox': 'figure'\n },\n\n // Selectors to remove from the extracted content\n clean: ['.mw-editsection', 'figure tr, figure td, figure tbody', '#toc', '.navbox']\n\n },\n\n author: 'Wikipedia Contributors',\n\n title: {\n selectors: ['h2.title']\n },\n\n date_published: {\n selectors: ['#footer-info-lastmod']\n }\n\n};\n\nvar TwitterExtractor = {\n domain: 'twitter.com',\n\n content: {\n transforms: {\n // We're transforming essentially the whole page here.\n // Twitter doesn't have nice selectors, so our initial\n // selector grabs the whole page, then we're re-writing\n // it to fit our needs before we clean it up.\n '.permalink[role=main]': function permalinkRoleMain($node, $) {\n var tweets = $node.find('.tweet');\n var $tweetContainer = $('');\n $tweetContainer.append(tweets);\n $node.replaceWith($tweetContainer);\n },\n\n // Twitter wraps @ with s, which\n // renders as a strikethrough\n s: 'span'\n },\n\n selectors: ['.permalink[role=main]'],\n\n defaultCleaner: false,\n\n clean: ['.stream-item-footer', 'button', '.tweet-details-fixer']\n },\n\n author: {\n selectors: ['.tweet.permalink-tweet .username']\n },\n\n date_published: {\n selectors: [['.permalink-tweet ._timestamp[data-time-ms]', 'data-time-ms']]\n }\n\n};\n\nvar NYTimesExtractor = {\n domain: 'www.nytimes.com',\n\n title: {\n selectors: ['.g-headline', 'h1.headline']\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value'], '.g-byline', '.byline']\n },\n\n content: {\n selectors: ['div.g-blocks', 'article#story'],\n\n defaultCleaner: false,\n\n transforms: {\n 'img.g-lazy': function imgGLazy($node) {\n var src = $node.attr('src');\n // const widths = $node.attr('data-widths')\n // .slice(1)\n // .slice(0, -1)\n // .split(',');\n // if (widths.length) {\n // width = widths.slice(-1);\n // } else {\n // width = '900';\n // }\n var width = 640;\n\n src = src.replace('{{size}}', width);\n $node.attr('src', src);\n }\n },\n\n clean: ['.ad', 'header#story-header', '.story-body-1 .lede.video', '.visually-hidden', '#newsletter-promo', '.promo', '.comments-button', '.hidden']\n },\n\n date_published: null,\n\n lead_image_url: null,\n\n dek: null,\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\nvar TheAtlanticExtractor = {\n domain: 'www.theatlantic.com',\n title: {\n selectors: ['h1.hed']\n },\n\n author: {\n selectors: ['article#article .article-cover-extra .metadata .byline a']\n },\n\n content: {\n selectors: ['.article-body'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n date_published: {\n selectors: [['time[itemProp=\"datePublished\"]', 'datetime']]\n },\n\n lead_image_url: null,\n\n dek: null,\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar NewYorkerExtractor = {\n domain: 'www.newyorker.com',\n title: {\n selectors: ['h1.title']\n },\n\n author: {\n selectors: ['.contributors']\n },\n\n content: {\n selectors: ['div#articleBody', 'div.articleBody'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar WiredExtractor = {\n domain: 'www.wired.com',\n title: {\n selectors: ['h1.post-title']\n },\n\n author: {\n selectors: ['a[rel=\"author\"]']\n },\n\n content: {\n selectors: ['article.content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.visually-hidden']\n },\n\n date_published: {\n selectors: [['meta[itemprop=\"datePublished\"]', 'value']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar MSNExtractor = {\n domain: 'www.msn.com',\n title: {\n selectors: ['h1']\n },\n\n author: {\n selectors: ['span.authorname-txt']\n },\n\n content: {\n selectors: ['div.richtext'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['span.caption']\n },\n\n date_published: {\n selectors: ['span.time']\n },\n\n lead_image_url: {\n selectors: []\n },\n\n dek: {\n selectors: [['meta[name=\"description\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar YahooExtractor = {\n domain: 'www.yahoo.com',\n title: {\n selectors: ['header.canvas-header']\n },\n\n author: {\n selectors: ['span.provider-name']\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.content-canvas'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.figure-caption']\n },\n\n date_published: {\n selectors: [['time.date[datetime]', 'datetime']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar BuzzfeedExtractor = {\n domain: 'www.buzzfeed.com',\n title: {\n selectors: ['h1[id=\"post-title\"]']\n },\n\n author: {\n selectors: ['a[data-action=\"user/username\"]', 'byline__author']\n },\n\n content: {\n selectors: ['#buzz_sub_buzz'],\n\n defaultCleaner: false,\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n h2: 'b'\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['.instapaper_ignore', '.suplist_list_hide .buzz_superlist_item .buzz_superlist_number_inline', '.share-box']\n },\n\n date_published: {\n selectors: ['.buzz-datetime']\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=\"description\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar WikiaExtractor = {\n domain: 'fandom.wikia.com',\n title: {\n selectors: ['h1.entry-title']\n },\n\n author: {\n selectors: ['.author vcard', '.fn']\n },\n\n content: {\n selectors: ['.grid-content', '.entry-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n date_published: {\n selectors: [['meta[name=\"article:published_time\"]', 'value']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar LittleThingsExtractor = {\n domain: 'www.littlethings.com',\n title: {\n selectors: ['h1.post-title']\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']]\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.mainContentIntro', '.content-wrapper'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar PoliticoExtractor = {\n domain: 'www.politico.com',\n title: {\n selectors: [\n // enter title selectors\n ['meta[name=\"og:title\"]', 'value']]\n },\n\n author: {\n selectors: ['.story-main-content .byline .vcard']\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.story-main-content', '.content-group', '.story-core', '.story-text'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: ['figcaption']\n },\n\n date_published: {\n selectors: [['.story-main-content .timestamp time[datetime]', 'datetime']]\n },\n\n lead_image_url: {\n selectors: [\n // enter lead_image_url selectors\n ['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=\"description\"]', 'value']]\n },\n\n next_page_url: null,\n\n excerpt: null\n};\n\nvar DeadspinExtractor = {\n domain: 'deadspin.com',\n\n supportedDomains: ['jezebel.com', 'lifehacker.com', 'kotaku.com', 'gizmodo.com', 'jalopnik.com', 'kinja.com'],\n\n title: {\n selectors: ['h1.headline']\n },\n\n author: {\n selectors: ['.author']\n },\n\n content: {\n selectors: ['.post-content', '.entry-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'iframe.lazyload[data-recommend-id^=\"youtube://\"]': function iframeLazyloadDataRecommendIdYoutube($node) {\n var youtubeId = $node.attr('id').split('youtube-')[1];\n $node.attr('src', 'https://www.youtube.com/embed/' + youtubeId);\n }\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n date_published: {\n selectors: [['time.updated[datetime]', 'datetime']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [\n // enter selectors\n ]\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ]\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ]\n }\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar BroadwayWorldExtractor = {\n domain: 'www.broadwayworld.com',\n title: {\n selectors: ['h1.article-title']\n },\n\n author: {\n selectors: ['span[itemprop=author]']\n },\n\n content: {\n selectors: ['div[itemprop=articlebody]'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {},\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n date_published: {\n selectors: [['meta[itemprop=datePublished]', 'value']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=\"og:description\"]', 'value']]\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ]\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ]\n }\n};\n\n// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nvar ApartmentTherapyExtractor = {\n domain: 'www.apartmenttherapy.com',\n title: {\n selectors: ['h1.headline']\n },\n\n author: {\n selectors: ['.PostByline__name']\n },\n\n content: {\n selectors: ['div.post__content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div[data-render-react-id=\"images/LazyPicture\"]': function divDataRenderReactIdImagesLazyPicture($node, $) {\n var data = JSON.parse($node.attr('data-props'));\n var src = data.sources[0].src;\n\n var $img = $('').attr('src', src);\n $node.replaceWith($img);\n }\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n date_published: {\n selectors: [['.PostByline__timestamp[datetime]', 'datetime']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [['meta[name=description]', 'value']]\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ]\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ]\n }\n};\n\nvar MediumExtractor = {\n domain: 'medium.com',\n\n supportedDomains: ['trackchanges.postlight.com'],\n\n title: {\n selectors: ['h1']\n },\n\n author: {\n selectors: [['meta[name=\"author\"]', 'value']]\n },\n\n content: {\n selectors: ['.section-content'],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n // Re-write lazy-loaded youtube videos\n iframe: function iframe($node) {\n var ytRe = /https:\\/\\/i.embed.ly\\/.+url=https:\\/\\/i\\.ytimg\\.com\\/vi\\/(\\w+)\\//;\n var thumb = decodeURIComponent($node.attr('data-thumbnail'));\n\n if (ytRe.test(thumb)) {\n var _thumb$match = thumb.match(ytRe),\n _thumb$match2 = _slicedToArray(_thumb$match, 2),\n _ = _thumb$match2[0],\n youtubeId = _thumb$match2[1]; // eslint-disable-line\n\n\n $node.attr('src', 'https://www.youtube.com/embed/' + youtubeId);\n var $parent = $node.parents('figure');\n $parent.prepend($node.clone());\n $node.remove();\n }\n }\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: []\n },\n\n date_published: {\n selectors: [['time[datetime]', 'datetime']]\n },\n\n lead_image_url: {\n selectors: [['meta[name=\"og:image\"]', 'value']]\n },\n\n dek: {\n selectors: [\n // enter selectors\n ]\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ]\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ]\n }\n};\n\n\n\nvar CustomExtractors = Object.freeze({\n\tBloggerExtractor: BloggerExtractor,\n\tNYMagExtractor: NYMagExtractor,\n\tWikipediaExtractor: WikipediaExtractor,\n\tTwitterExtractor: TwitterExtractor,\n\tNYTimesExtractor: NYTimesExtractor,\n\tTheAtlanticExtractor: TheAtlanticExtractor,\n\tNewYorkerExtractor: NewYorkerExtractor,\n\tWiredExtractor: WiredExtractor,\n\tMSNExtractor: MSNExtractor,\n\tYahooExtractor: YahooExtractor,\n\tBuzzfeedExtractor: BuzzfeedExtractor,\n\tWikiaExtractor: WikiaExtractor,\n\tLittleThingsExtractor: LittleThingsExtractor,\n\tPoliticoExtractor: PoliticoExtractor,\n\tDeadspinExtractor: DeadspinExtractor,\n\tBroadwayWorldExtractor: BroadwayWorldExtractor,\n\tApartmentTherapyExtractor: ApartmentTherapyExtractor,\n\tMediumExtractor: MediumExtractor\n});\n\nvar Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {\n var extractor = CustomExtractors[key];\n return _extends({}, acc, mergeSupportedDomains(extractor));\n}, {});\n\n// CLEAN AUTHOR CONSTANTS\nvar CLEAN_AUTHOR_RE = /^\\s*(posted |written )?by\\s*:?\\s*(.*)/i;\n// author = re.sub(r'^\\s*(posted |written )?by\\s*:?\\s*(.*)(?i)',\n\n// CLEAN DEK CONSTANTS\nvar TEXT_LINK_RE = new RegExp('http(s)?://', 'i');\n// An ordered list of meta tag names that denote likely article deks.\n// From most distinct to least distinct.\n//\n// NOTE: There are currently no meta tags that seem to provide the right\n// content consistenty enough. Two options were:\n// - og:description\n// - dc.description\n// However, these tags often have SEO-specific junk in them that's not\n// header-worthy like a dek is. Excerpt material at best.\n\n\n// An ordered list of Selectors to find likely article deks. From\n// most explicit to least explicit.\n//\n// Should be more restrictive than not, as a failed dek can be pretty\n// detrimental to the aesthetics of an article.\n\n\n// CLEAN DATE PUBLISHED CONSTANTS\nvar MS_DATE_STRING = /^\\d{13}$/i;\nvar SEC_DATE_STRING = /^\\d{10}$/i;\nvar CLEAN_DATE_STRING_RE = /^\\s*published\\s*:?\\s*(.*)/i;\nvar TIME_MERIDIAN_SPACE_RE = /(.*\\d)(am|pm)(.*)/i;\nvar TIME_MERIDIAN_DOTS_RE = /\\.m\\./i;\nvar months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'];\nvar allMonths = months.join('|');\nvar timestamp1 = '[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';\nvar timestamp2 = '[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';\nvar SPLIT_DATE_STRING = new RegExp('(' + timestamp1 + ')|(' + timestamp2 + ')|([0-9]{1,4})|(' + allMonths + ')', 'ig');\n\n// CLEAN TITLE CONSTANTS\n// A regular expression that will match separating characters on a\n// title, that usually denote breadcrumbs or something similar.\nvar TITLE_SPLITTERS_RE = /(: | - | \\| )/g;\n\nvar DOMAIN_ENDINGS_RE = new RegExp('.com$|.net$|.org$|.co.uk$', 'g');\n\n// Take an author string (like 'By David Smith ') and clean it to\n// just the name(s): 'David Smith'.\nfunction cleanAuthor(author) {\n return author.replace(CLEAN_AUTHOR_RE, '$2').trim();\n}\n\nfunction clean$1(leadImageUrl) {\n leadImageUrl = leadImageUrl.trim();\n if (validUrl.isWebUri(leadImageUrl)) {\n return leadImageUrl;\n }\n\n return null;\n}\n\n// Take a dek HTML fragment, and return the cleaned version of it.\n// Return None if the dek wasn't good enough.\nfunction cleanDek(dek, _ref) {\n var $ = _ref.$,\n excerpt = _ref.excerpt;\n\n // Sanity check that we didn't get too short or long of a dek.\n if (dek.length > 1000 || dek.length < 5) return null;\n\n // Check that dek isn't the same as excerpt\n if (excerpt && excerptContent(excerpt, 10) === excerptContent(dek, 10)) return null;\n\n var dekText = stripTags(dek, $);\n\n // Plain text links shouldn't exist in the dek. If we have some, it's\n // not a good dek - bail.\n if (TEXT_LINK_RE.test(dekText)) return null;\n\n return dekText.trim();\n}\n\n// Is there a compelling reason to use moment here?\n// Mostly only being used for the isValid() method,\n// but could just check for 'Invalid Date' string.\n\nfunction cleanDateString(dateString) {\n return (dateString.match(SPLIT_DATE_STRING) || []).join(' ').replace(TIME_MERIDIAN_DOTS_RE, 'm').replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3').replace(CLEAN_DATE_STRING_RE, '$1').trim();\n}\n\n// Take a date published string, and hopefully return a date out of\n// it. Return none if we fail.\nfunction cleanDatePublished(dateString) {\n // If string is in milliseconds or seconds, convert to int\n if (MS_DATE_STRING.test(dateString) || SEC_DATE_STRING.test(dateString)) {\n dateString = parseInt(dateString, 10);\n }\n\n var date = moment(new Date(dateString));\n\n if (!date.isValid()) {\n dateString = cleanDateString(dateString);\n date = moment(new Date(dateString));\n }\n\n return date.isValid() ? date.toISOString() : null;\n}\n\n// Clean our article content, returning a new, cleaned node.\n\nfunction extractCleanNode(article, _ref) {\n var $ = _ref.$,\n _ref$cleanConditional = _ref.cleanConditionally,\n cleanConditionally = _ref$cleanConditional === undefined ? true : _ref$cleanConditional,\n _ref$title = _ref.title,\n title = _ref$title === undefined ? '' : _ref$title,\n _ref$url = _ref.url,\n url = _ref$url === undefined ? '' : _ref$url,\n _ref$defaultCleaner = _ref.defaultCleaner,\n defaultCleaner = _ref$defaultCleaner === undefined ? true : _ref$defaultCleaner;\n\n // Rewrite the tag name to div if it's a top level node like body or\n // html to avoid later complications with multiple body tags.\n rewriteTopLevel$$1(article, $);\n\n // Drop small images and spacer images\n // Only do this is defaultCleaner is set to true;\n // this can sometimes be too aggressive.\n if (defaultCleaner) cleanImages(article, $);\n\n // Mark elements to keep that would normally be removed.\n // E.g., stripJunkTags will remove iframes, so we're going to mark\n // YouTube/Vimeo videos as elements we want to keep.\n markToKeep(article, $, url);\n\n // Drop certain tags like , etc\n // This is -mostly- for cleanliness, not security.\n stripJunkTags(article, $);\n\n // H1 tags are typically the article title, which should be extracted\n // by the title extractor instead. If there's less than 3 of them (<3),\n // strip them. Otherwise, turn 'em into H2s.\n cleanHOnes$$1(article, $);\n\n // Clean headers\n cleanHeaders(article, $, title);\n\n // Make links absolute\n makeLinksAbsolute$$1(article, $, url);\n\n // We used to clean UL's and OL's here, but it was leading to\n // too many in-article lists being removed. Consider a better\n // way to detect menus particularly and remove them.\n // Also optionally running, since it can be overly aggressive.\n if (defaultCleaner) cleanTags$$1(article, $, cleanConditionally);\n\n // Remove empty paragraph nodes\n removeEmpty(article, $);\n\n // Remove unnecessary attributes\n cleanAttributes$$1(article, $);\n\n return article;\n}\n\nfunction cleanTitle$$1(title, _ref) {\n var url = _ref.url,\n $ = _ref.$;\n\n // If title has |, :, or - in it, see if\n // we can clean it up.\n if (TITLE_SPLITTERS_RE.test(title)) {\n title = resolveSplitTitle(title, url);\n }\n\n // Final sanity check that we didn't get a crazy title.\n // if (title.length > 150 || title.length < 15) {\n if (title.length > 150) {\n // If we did, return h1 from the document if it exists\n var h1 = $('h1');\n if (h1.length === 1) {\n title = h1.text();\n }\n }\n\n // strip any html tags in the title text\n return stripTags(title, $).trim();\n}\n\nfunction extractBreadcrumbTitle(splitTitle, text) {\n // This must be a very breadcrumbed title, like:\n // The Best Gadgets on Earth : Bits : Blogs : NYTimes.com\n // NYTimes - Blogs - Bits - The Best Gadgets on Earth\n if (splitTitle.length >= 6) {\n var _ret = function () {\n // Look to see if we can find a breadcrumb splitter that happens\n // more than once. If we can, we'll be able to better pull out\n // the title.\n var termCounts = splitTitle.reduce(function (acc, titleText) {\n acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;\n return acc;\n }, {});\n\n var _Reflect$ownKeys$redu = _Reflect$ownKeys(termCounts).reduce(function (acc, key) {\n if (acc[1] < termCounts[key]) {\n return [key, termCounts[key]];\n }\n\n return acc;\n }, [0, 0]),\n _Reflect$ownKeys$redu2 = _slicedToArray(_Reflect$ownKeys$redu, 2),\n maxTerm = _Reflect$ownKeys$redu2[0],\n termCount = _Reflect$ownKeys$redu2[1];\n\n // We found a splitter that was used more than once, so it\n // is probably the breadcrumber. Split our title on that instead.\n // Note: max_term should be <= 4 characters, so that \" >> \"\n // will match, but nothing longer than that.\n\n\n if (termCount >= 2 && maxTerm.length <= 4) {\n splitTitle = text.split(maxTerm);\n }\n\n var splitEnds = [splitTitle[0], splitTitle.slice(-1)];\n var longestEnd = splitEnds.reduce(function (acc, end) {\n return acc.length > end.length ? acc : end;\n }, '');\n\n if (longestEnd.length > 10) {\n return {\n v: longestEnd\n };\n }\n\n return {\n v: text\n };\n }();\n\n if ((typeof _ret === 'undefined' ? 'undefined' : _typeof(_ret)) === \"object\") return _ret.v;\n }\n\n return null;\n}\n\nfunction cleanDomainFromTitle(splitTitle, url) {\n // Search the ends of the title, looking for bits that fuzzy match\n // the URL too closely. If one is found, discard it and return the\n // rest.\n //\n // Strip out the big TLDs - it just makes the matching a bit more\n // accurate. Not the end of the world if it doesn't strip right.\n var _URL$parse = URL.parse(url),\n host = _URL$parse.host;\n\n var nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');\n\n var startSlug = splitTitle[0].toLowerCase().replace(' ', '');\n var startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);\n\n if (startSlugRatio > 0.4 && startSlug.length > 5) {\n return splitTitle.slice(2).join('');\n }\n\n var endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '');\n var endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);\n\n if (endSlugRatio > 0.4 && endSlug.length >= 5) {\n return splitTitle.slice(0, -2).join('');\n }\n\n return null;\n}\n\n// Given a title with separators in it (colons, dashes, etc),\n// resolve whether any of the segments should be removed.\nfunction resolveSplitTitle(title) {\n var url = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : '';\n\n // Splits while preserving splitters, like:\n // ['The New New York', ' - ', 'The Washington Post']\n var splitTitle = title.split(TITLE_SPLITTERS_RE);\n if (splitTitle.length === 1) {\n return title;\n }\n\n var newTitle = extractBreadcrumbTitle(splitTitle, title);\n if (newTitle) return newTitle;\n\n newTitle = cleanDomainFromTitle(splitTitle, url);\n if (newTitle) return newTitle;\n\n // Fuzzy ratio didn't find anything, so this title is probably legit.\n // Just return it all.\n return title;\n}\n\nvar Cleaners = {\n author: cleanAuthor,\n lead_image_url: clean$1,\n dek: cleanDek,\n date_published: cleanDatePublished,\n content: extractCleanNode,\n title: cleanTitle$$1\n};\n\n// Using a variety of scoring techniques, extract the content most\n// likely to be article text.\n//\n// If strip_unlikely_candidates is True, remove any elements that\n// match certain criteria first. (Like, does this element have a\n// classname of \"comment\")\n//\n// If weight_nodes is True, use classNames and IDs to determine the\n// worthiness of nodes.\n//\n// Returns a cheerio object $\nfunction extractBestNode($, opts) {\n // clone the node so we can get back to our\n // initial parsed state if needed\n // TODO Do I need this? – AP\n // let $root = $.root().clone()\n\n if (opts.stripUnlikelyCandidates) {\n $ = stripUnlikelyCandidates($);\n }\n\n $ = convertToParagraphs$$1($);\n $ = scoreContent$$1($, opts.weightNodes);\n var $topCandidate = findTopCandidate$$1($);\n\n return $topCandidate;\n}\n\nvar GenericContentExtractor = {\n defaultOpts: {\n stripUnlikelyCandidates: true,\n weightNodes: true,\n cleanConditionally: true\n },\n\n // Extract the content for this resource - initially, pass in our\n // most restrictive opts which will return the highest quality\n // content. On each failure, retry with slightly more lax opts.\n //\n // :param return_type: string. If \"node\", should return the content\n // as a cheerio node rather than as an HTML string.\n //\n // Opts:\n // stripUnlikelyCandidates: Remove any elements that match\n // non-article-like criteria first.(Like, does this element\n // have a classname of \"comment\")\n //\n // weightNodes: Modify an elements score based on whether it has\n // certain classNames or IDs. Examples: Subtract if a node has\n // a className of 'comment', Add if a node has an ID of\n // 'entry-content'.\n //\n // cleanConditionally: Clean the node to return of some\n // superfluous content. Things like forms, ads, etc.\n extract: function extract(_ref, opts) {\n var $ = _ref.$,\n html = _ref.html,\n title = _ref.title,\n url = _ref.url,\n cheerio$$1 = _ref.cheerio;\n\n opts = _extends({}, this.defaultOpts, opts);\n\n $ = $ || cheerio$$1.load(html);\n\n // Cascade through our extraction-specific opts in an ordered fashion,\n // turning them off as we try to extract content.\n var node = this.getContentNode($, title, url, opts);\n\n if (nodeIsSufficient(node)) {\n return this.cleanAndReturnNode(node, $);\n }\n\n // We didn't succeed on first pass, one by one disable our\n // extraction opts and try again.\n var _iteratorNormalCompletion = true;\n var _didIteratorError = false;\n var _iteratorError = undefined;\n\n try {\n for (var _iterator = _getIterator(_Reflect$ownKeys(opts).filter(function (k) {\n return opts[k] === true;\n })), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {\n var key = _step.value;\n\n opts[key] = false;\n $ = cheerio$$1.load(html);\n\n node = this.getContentNode($, title, url, opts);\n\n if (nodeIsSufficient(node)) {\n break;\n }\n }\n } catch (err) {\n _didIteratorError = true;\n _iteratorError = err;\n } finally {\n try {\n if (!_iteratorNormalCompletion && _iterator.return) {\n _iterator.return();\n }\n } finally {\n if (_didIteratorError) {\n throw _iteratorError;\n }\n }\n }\n\n return this.cleanAndReturnNode(node, $);\n },\n\n\n // Get node given current options\n getContentNode: function getContentNode($, title, url, opts) {\n return extractCleanNode(extractBestNode($, opts), {\n $: $,\n cleanConditionally: opts.cleanConditionally,\n title: title,\n url: url\n });\n },\n\n\n // Once we got here, either we're at our last-resort node, or\n // we broke early. Make sure we at least have -something- before we\n // move forward.\n cleanAndReturnNode: function cleanAndReturnNode(node, $) {\n if (!node) {\n return null;\n }\n\n return normalizeSpaces($.html(node));\n\n // if return_type == \"html\":\n // return normalize_spaces(node_to_html(node))\n // else:\n // return node\n }\n};\n\n// TODO: It would be great if we could merge the meta and selector lists into\n// a list of objects, because we could then rank them better. For example,\n// .hentry .entry-title is far better suited than .\n\n// An ordered list of meta tag names that denote likely article titles. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\nvar STRONG_TITLE_META_TAGS = ['tweetmeme-title', 'dc.title', 'rbtitle', 'headline', 'title'];\n\n// og:title is weak because it typically contains context that we don't like,\n// for example the source site's name. Gotta get that brand into facebook!\nvar WEAK_TITLE_META_TAGS = ['og:title'];\n\n// An ordered list of XPath Selectors to find likely article titles. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nvar STRONG_TITLE_SELECTORS = ['.hentry .entry-title', 'h1#articleHeader', 'h1.articleHeader', 'h1.article', '.instapaper_title', '#meebo-title'];\n\nvar WEAK_TITLE_SELECTORS = ['article h1', '#entry-title', '.entry-title', '#entryTitle', '#entrytitle', '.entryTitle', '.entrytitle', '#articleTitle', '.articleTitle', 'post post-title', 'h1.title', 'h2.article', 'h1', 'html head title', 'title'];\n\nvar GenericTitleExtractor = {\n extract: function extract(_ref) {\n var $ = _ref.$,\n url = _ref.url,\n metaCache = _ref.metaCache;\n\n // First, check to see if we have a matching meta tag that we can make\n // use of that is strongly associated with the headline.\n var title = void 0;\n\n title = extractFromMeta$$1($, STRONG_TITLE_META_TAGS, metaCache);\n if (title) return cleanTitle$$1(title, { url: url, $: $ });\n\n // Second, look through our content selectors for the most likely\n // article title that is strongly associated with the headline.\n title = extractFromSelectors$$1($, STRONG_TITLE_SELECTORS);\n if (title) return cleanTitle$$1(title, { url: url, $: $ });\n\n // Third, check for weaker meta tags that may match.\n title = extractFromMeta$$1($, WEAK_TITLE_META_TAGS, metaCache);\n if (title) return cleanTitle$$1(title, { url: url, $: $ });\n\n // Last, look for weaker selector tags that may match.\n title = extractFromSelectors$$1($, WEAK_TITLE_SELECTORS);\n if (title) return cleanTitle$$1(title, { url: url, $: $ });\n\n // If no matches, return an empty string\n return '';\n }\n};\n\n// An ordered list of meta tag names that denote likely article authors. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\n//\n// Note: \"author\" is too often the -developer- of the page, so it is not\n// added here.\nvar AUTHOR_META_TAGS = ['byl', 'clmst', 'dc.author', 'dcsext.author', 'dc.creator', 'rbauthors', 'authors'];\n\nvar AUTHOR_MAX_LENGTH = 300;\n\n// An ordered list of XPath Selectors to find likely article authors. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nvar AUTHOR_SELECTORS = ['.entry .entry-author', '.author.vcard .fn', '.author .vcard .fn', '.byline.vcard .fn', '.byline .vcard .fn', '.byline .by .author', '.byline .by', '.byline .author', '.post-author.vcard', '.post-author .vcard', 'a[rel=author]', '#by_author', '.by_author', '#entryAuthor', '.entryAuthor', '.byline a[href*=author]', '#author .authorname', '.author .authorname', '#author', '.author', '.articleauthor', '.ArticleAuthor', '.byline'];\n\n// An ordered list of Selectors to find likely article authors, with\n// regular expression for content.\nvar bylineRe = /^[\\n\\s]*By/i;\nvar BYLINE_SELECTORS_RE = [['#byline', bylineRe], ['.byline', bylineRe]];\n\nvar GenericAuthorExtractor = {\n extract: function extract(_ref) {\n var $ = _ref.$,\n metaCache = _ref.metaCache;\n\n var author = void 0;\n\n // First, check to see if we have a matching\n // meta tag that we can make use of.\n author = extractFromMeta$$1($, AUTHOR_META_TAGS, metaCache);\n if (author && author.length < AUTHOR_MAX_LENGTH) {\n return cleanAuthor(author);\n }\n\n // Second, look through our selectors looking for potential authors.\n author = extractFromSelectors$$1($, AUTHOR_SELECTORS, 2);\n if (author && author.length < AUTHOR_MAX_LENGTH) {\n return cleanAuthor(author);\n }\n\n // Last, use our looser regular-expression based selectors for\n // potential authors.\n var _iteratorNormalCompletion = true;\n var _didIteratorError = false;\n var _iteratorError = undefined;\n\n try {\n for (var _iterator = _getIterator(BYLINE_SELECTORS_RE), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {\n var _ref4 = _step.value;\n\n var _ref3 = _slicedToArray(_ref4, 2);\n\n var selector = _ref3[0];\n var regex = _ref3[1];\n\n var node = $(selector);\n if (node.length === 1) {\n var text = node.text();\n if (regex.test(text)) {\n return cleanAuthor(text);\n }\n }\n }\n } catch (err) {\n _didIteratorError = true;\n _iteratorError = err;\n } finally {\n try {\n if (!_iteratorNormalCompletion && _iterator.return) {\n _iterator.return();\n }\n } finally {\n if (_didIteratorError) {\n throw _iteratorError;\n }\n }\n }\n\n return null;\n }\n};\n\n// An ordered list of meta tag names that denote\n// likely date published dates. All attributes\n// should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nvar DATE_PUBLISHED_META_TAGS = ['article:published_time', 'displaydate', 'dc.date', 'dc.date.issued', 'rbpubdate', 'publish_date', 'pub_date', 'pagedate', 'pubdate', 'revision_date', 'doc_date', 'date_created', 'content_create_date', 'lastmodified', 'created', 'date'];\n\n// An ordered list of XPath Selectors to find\n// likely date published dates. From most explicit\n// to least explicit.\nvar DATE_PUBLISHED_SELECTORS = ['.hentry .dtstamp.published', '.hentry .published', '.hentry .dtstamp.updated', '.hentry .updated', '.single .published', '.meta .published', '.meta .postDate', '.entry-date', '.byline .date', '.postmetadata .date', '.article_datetime', '.date-header', '.story-date', '.dateStamp', '#story .datetime', '.dateline', '.pubdate'];\n\n// An ordered list of compiled regular expressions to find likely date\n// published dates from the URL. These should always have the first\n// reference be a date string that is parseable by dateutil.parser.parse\nvar abbrevMonthsStr = '(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)';\nvar DATE_PUBLISHED_URL_RES = [\n// /2012/01/27/ but not /2012/01/293\nnew RegExp('/(20\\\\d{2}/\\\\d{2}/\\\\d{2})/', 'i'),\n// 20120127 or 20120127T but not 2012012733 or 8201201733\n// /[^0-9](20\\d{2}[01]\\d[0-3]\\d)([^0-9]|$)/i,\n// 2012-01-27\nnew RegExp('(20\\\\d{2}-[01]\\\\d-[0-3]\\\\d)', 'i'),\n// /2012/jan/27/\nnew RegExp('/(20\\\\d{2}/' + abbrevMonthsStr + '/[0-3]\\\\d)/', 'i')];\n\nvar GenericDatePublishedExtractor = {\n extract: function extract(_ref) {\n var $ = _ref.$,\n url = _ref.url,\n metaCache = _ref.metaCache;\n\n var datePublished = void 0;\n // First, check to see if we have a matching meta tag\n // that we can make use of.\n // Don't try cleaning tags from this string\n datePublished = extractFromMeta$$1($, DATE_PUBLISHED_META_TAGS, metaCache, false);\n if (datePublished) return cleanDatePublished(datePublished);\n\n // Second, look through our selectors looking for potential\n // date_published's.\n datePublished = extractFromSelectors$$1($, DATE_PUBLISHED_SELECTORS);\n if (datePublished) return cleanDatePublished(datePublished);\n\n // Lastly, look to see if a dately string exists in the URL\n datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);\n if (datePublished) return cleanDatePublished(datePublished);\n\n return null;\n }\n};\n\n// import {\n// DEK_META_TAGS,\n// DEK_SELECTORS,\n// DEK_URL_RES,\n// } from './constants';\n\n// import { cleanDek } from 'cleaners';\n\n// import {\n// extractFromMeta,\n// extractFromSelectors,\n// } from 'utils/dom';\n\n// Currently there is only one selector for\n// deks. We should simply return null here\n// until we have a more robust generic option.\n// Below is the original source for this, for reference.\nvar GenericDekExtractor = {\n // extract({ $, content, metaCache }) {\n extract: function extract() {\n return null;\n }\n};\n\n\n\n// def extract_dek(self):\n// # First, check to see if we have a matching meta tag that we can make\n// # use of.\n// dek = self.extract_from_meta('dek', constants.DEK_META_TAGS)\n// if not dek:\n// # Second, look through our CSS/XPath selectors. This may return\n// # an HTML fragment.\n// dek = self.extract_from_selectors('dek',\n// constants.DEK_SELECTORS,\n// text_only=False)\n//\n// if dek:\n// # Make sure our dek isn't in the first few thousand characters\n// # of the content, otherwise it's just the start of the article\n// # and not a true dek.\n// content = self.extract_content()\n// content_chunk = normalize_spaces(strip_tags(content[:2000]))\n// dek_chunk = normalize_spaces(dek[:100]) # Already has no tags.\n//\n// # 80% or greater similarity means the dek was very similar to some\n// # of the starting content, so we skip it.\n// if fuzz.partial_ratio(content_chunk, dek_chunk) < 80:\n// return dek\n//\n// return None\n\n// An ordered list of meta tag names that denote likely article leading images.\n// All attributes should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nvar LEAD_IMAGE_URL_META_TAGS = ['og:image', 'twitter:image', 'image_src'];\n\nvar LEAD_IMAGE_URL_SELECTORS = ['link[rel=image_src]'];\n\nvar POSITIVE_LEAD_IMAGE_URL_HINTS = ['upload', 'wp-content', 'large', 'photo', 'wp-image'];\nvar POSITIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(POSITIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');\n\nvar NEGATIVE_LEAD_IMAGE_URL_HINTS = ['spacer', 'sprite', 'blank', 'throbber', 'gradient', 'tile', 'bg', 'background', 'icon', 'social', 'header', 'hdr', 'advert', 'spinner', 'loader', 'loading', 'default', 'rating', 'share', 'facebook', 'twitter', 'theme', 'promo', 'ads', 'wp-includes'];\nvar NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');\n\nvar GIF_RE = /\\.gif(\\?.*)?$/i;\nvar JPG_RE = /\\.jpe?g(\\?.*)?$/i;\n\nfunction getSig($node) {\n return ($node.attr('class') || '') + ' ' + ($node.attr('id') || '');\n}\n\n// Scores image urls based on a variety of heuristics.\nfunction scoreImageUrl(url) {\n url = url.trim();\n var score = 0;\n\n if (POSITIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n score += 20;\n }\n\n if (NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n score -= 20;\n }\n\n // TODO: We might want to consider removing this as\n // gifs are much more common/popular than they once were\n if (GIF_RE.test(url)) {\n score -= 10;\n }\n\n if (JPG_RE.test(url)) {\n score += 10;\n }\n\n // PNGs are neutral.\n\n return score;\n}\n\n// Alt attribute usually means non-presentational image.\nfunction scoreAttr($img) {\n if ($img.attr('alt')) {\n return 5;\n }\n\n return 0;\n}\n\n// Look through our parent and grandparent for figure-like\n// container elements, give a bonus if we find them\nfunction scoreByParents($img) {\n var score = 0;\n var $figParent = $img.parents('figure').first();\n\n if ($figParent.length === 1) {\n score += 25;\n }\n\n var $parent = $img.parent();\n var $gParent = void 0;\n if ($parent.length === 1) {\n $gParent = $parent.parent();\n }\n\n [$parent, $gParent].forEach(function ($node) {\n if (PHOTO_HINTS_RE$1.test(getSig($node))) {\n score += 15;\n }\n });\n\n return score;\n}\n\n// Look at our immediate sibling and see if it looks like it's a\n// caption. Bonus if so.\nfunction scoreBySibling($img) {\n var score = 0;\n var $sibling = $img.next();\n var sibling = $sibling.get(0);\n\n if (sibling && sibling.tagName === 'figcaption') {\n score += 25;\n }\n\n if (PHOTO_HINTS_RE$1.test(getSig($sibling))) {\n score += 15;\n }\n\n return score;\n}\n\nfunction scoreByDimensions($img) {\n var score = 0;\n\n var width = parseFloat($img.attr('width'));\n var height = parseFloat($img.attr('height'));\n var src = $img.attr('src');\n\n // Penalty for skinny images\n if (width && width <= 50) {\n score -= 50;\n }\n\n // Penalty for short images\n if (height && height <= 50) {\n score -= 50;\n }\n\n if (width && height && !src.includes('sprite')) {\n var area = width * height;\n if (area < 5000) {\n // Smaller than 50 x 100\n score -= 100;\n } else {\n score += Math.round(area / 1000);\n }\n }\n\n return score;\n}\n\nfunction scoreByPosition($imgs, index) {\n return $imgs.length / 2 - index;\n}\n\n// Given a resource, try to find the lead image URL from within\n// it. Like content and next page extraction, uses a scoring system\n// to determine what the most likely image may be. Short circuits\n// on really probable things like og:image meta tags.\n//\n// Potential signals to still take advantage of:\n// * domain\n// * weird aspect ratio\nvar GenericLeadImageUrlExtractor = {\n extract: function extract(_ref) {\n var $ = _ref.$,\n content = _ref.content,\n metaCache = _ref.metaCache,\n html = _ref.html;\n\n var cleanUrl = void 0;\n if (!$.browser && $('head').length === 0) {\n $('*').first().prepend(html);\n }\n\n // Check to see if we have a matching meta tag that we can make use of.\n // Moving this higher because common practice is now to use large\n // images on things like Open Graph or Twitter cards.\n // images usually have for things like Open Graph.\n var imageUrl = extractFromMeta$$1($, LEAD_IMAGE_URL_META_TAGS, metaCache, false);\n\n if (imageUrl) {\n cleanUrl = clean$1(imageUrl);\n\n if (cleanUrl) return cleanUrl;\n }\n\n // Next, try to find the \"best\" image via the content.\n // We'd rather not have to fetch each image and check dimensions,\n // so try to do some analysis and determine them instead.\n var $content = $(content);\n var imgs = $('img', $content).toArray();\n var imgScores = {};\n\n imgs.forEach(function (img, index) {\n var $img = $(img);\n var src = $img.attr('src');\n\n if (!src) return;\n\n var score = scoreImageUrl(src);\n score += scoreAttr($img);\n score += scoreByParents($img);\n score += scoreBySibling($img);\n score += scoreByDimensions($img);\n score += scoreByPosition(imgs, index);\n\n imgScores[src] = score;\n });\n\n var _Reflect$ownKeys$redu = _Reflect$ownKeys(imgScores).reduce(function (acc, key) {\n return imgScores[key] > acc[1] ? [key, imgScores[key]] : acc;\n }, [null, 0]),\n _Reflect$ownKeys$redu2 = _slicedToArray(_Reflect$ownKeys$redu, 2),\n topUrl = _Reflect$ownKeys$redu2[0],\n topScore = _Reflect$ownKeys$redu2[1];\n\n if (topScore > 0) {\n cleanUrl = clean$1(topUrl);\n\n if (cleanUrl) return cleanUrl;\n }\n\n // If nothing else worked, check to see if there are any really\n // probable nodes in the doc, like .\n var _iteratorNormalCompletion = true;\n var _didIteratorError = false;\n var _iteratorError = undefined;\n\n try {\n for (var _iterator = _getIterator(LEAD_IMAGE_URL_SELECTORS), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {\n var selector = _step.value;\n\n var $node = $(selector).first();\n var src = $node.attr('src');\n if (src) {\n cleanUrl = clean$1(src);\n if (cleanUrl) return cleanUrl;\n }\n\n var href = $node.attr('href');\n if (href) {\n cleanUrl = clean$1(href);\n if (cleanUrl) return cleanUrl;\n }\n\n var value = $node.attr('value');\n if (value) {\n cleanUrl = clean$1(value);\n if (cleanUrl) return cleanUrl;\n }\n }\n } catch (err) {\n _didIteratorError = true;\n _iteratorError = err;\n } finally {\n try {\n if (!_iteratorNormalCompletion && _iterator.return) {\n _iterator.return();\n }\n } finally {\n if (_didIteratorError) {\n throw _iteratorError;\n }\n }\n }\n\n return null;\n }\n};\n\n\n\n// def extract(self):\n// \"\"\"\n// # First, try to find the \"best\" image via the content.\n// # We'd rather not have to fetch each image and check dimensions,\n// # so try to do some analysis and determine them instead.\n// content = self.extractor.extract_content(return_type=\"node\")\n// imgs = content.xpath('.//img')\n// img_scores = defaultdict(int)\n// logger.debug('Scoring %d images from content', len(imgs))\n// for (i, img) in enumerate(imgs):\n// img_score = 0\n//\n// if not 'src' in img.attrib:\n// logger.debug('No src attribute found')\n// continue\n//\n// try:\n// parsed_img = urlparse(img.attrib['src'])\n// img_path = parsed_img.path.lower()\n// except ValueError:\n// logger.debug('ValueError getting img path.')\n// continue\n// logger.debug('Image path is %s', img_path)\n//\n// if constants.POSITIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):\n// logger.debug('Positive URL hints match. Adding 20.')\n// img_score += 20\n//\n// if constants.NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):\n// logger.debug('Negative URL hints match. Subtracting 20.')\n// img_score -= 20\n//\n// # Gifs are more often structure than photos\n// if img_path.endswith('gif'):\n// logger.debug('gif found. Subtracting 10.')\n// img_score -= 10\n//\n// # JPGs are more often photographs\n// if img_path.endswith('jpg'):\n// logger.debug('jpg found. Adding 10.')\n// img_score += 10\n//\n// # PNGs are neutral.\n//\n// # Alt attribute usually means non-presentational image.\n// if 'alt' in img.attrib and len(img.attrib['alt']) > 5:\n// logger.debug('alt attribute found. Adding 5.')\n// img_score += 5\n//\n// # Look through our parent and grandparent for figure-like\n// # container elements, give a bonus if we find them\n// parents = [img.getparent()]\n// if parents[0] is not None and parents[0].getparent() is not None:\n// parents.append(parents[0].getparent())\n// for p in parents:\n// if p.tag == 'figure':\n// logger.debug('Parent with
tag found. Adding 25.')\n// img_score += 25\n//\n// p_sig = ' '.join([p.get('id', ''), p.get('class', '')])\n// if constants.PHOTO_HINTS_RE.search(p_sig):\n// logger.debug('Photo hints regex match. Adding 15.')\n// img_score += 15\n//\n// # Look at our immediate sibling and see if it looks like it's a\n// # caption. Bonus if so.\n// sibling = img.getnext()\n// if sibling is not None:\n// if sibling.tag == 'figcaption':\n// img_score += 25\n//\n// sib_sig = ' '.join([sibling.get('id', ''),\n// sibling.get('class', '')]).lower()\n// if 'caption' in sib_sig:\n// img_score += 15\n//\n// # Pull out width/height if they were set.\n// img_width = None\n// img_height = None\n// if 'width' in img.attrib:\n// try:\n// img_width = float(img.get('width'))\n// except ValueError:\n// pass\n// if 'height' in img.attrib:\n// try:\n// img_height = float(img.get('height'))\n// except ValueError:\n// pass\n//\n// # Penalty for skinny images\n// if img_width and img_width <= 50:\n// logger.debug('Skinny image found. Subtracting 50.')\n// img_score -= 50\n//\n// # Penalty for short images\n// if img_height and img_height <= 50:\n// # Wide, short images are more common than narrow, tall ones\n// logger.debug('Short image found. Subtracting 25.')\n// img_score -= 25\n//\n// if img_width and img_height and not 'sprite' in img_path:\n// area = img_width * img_height\n//\n// if area < 5000: # Smaller than 50x100\n// logger.debug('Image with small area found. Subtracting 100.')\n// img_score -= 100\n// else:\n// img_score += round(area/1000.0)\n//\n// # If the image is higher on the page than other images,\n// # it gets a bonus. Penalty if lower.\n// logger.debug('Adding page placement bonus of %d.', len(imgs)/2 - i)\n// img_score += len(imgs)/2 - i\n//\n// # Use the raw src here because we munged img_path for case\n// # insensitivity\n// logger.debug('Final score is %d.', img_score)\n// img_scores[img.attrib['src']] += img_score\n//\n// top_score = 0\n// top_url = None\n// for (url, score) in img_scores.items():\n// if score > top_score:\n// top_url = url\n// top_score = score\n//\n// if top_score > 0:\n// logger.debug('Using top score image from content. Score was %d', top_score)\n// return top_url\n//\n//\n// # If nothing else worked, check to see if there are any really\n// # probable nodes in the doc, like .\n// logger.debug('Trying to find lead image in probable nodes')\n// for selector in constants.LEAD_IMAGE_URL_SELECTORS:\n// nodes = self.resource.extract_by_selector(selector)\n// for node in nodes:\n// clean_value = None\n// if node.attrib.get('src'):\n// clean_value = self.clean(node.attrib['src'])\n//\n// if not clean_value and node.attrib.get('href'):\n// clean_value = self.clean(node.attrib['href'])\n//\n// if not clean_value and node.attrib.get('value'):\n// clean_value = self.clean(node.attrib['value'])\n//\n// if clean_value:\n// logger.debug('Found lead image in probable nodes.')\n// logger.debug('Node was: %s', node)\n// return clean_value\n//\n// return None\n\nfunction scoreSimilarity(score, articleUrl, href) {\n // Do this last and only if we have a real candidate, because it's\n // potentially expensive computationally. Compare the link to this\n // URL using difflib to get the % similarity of these URLs. On a\n // sliding scale, subtract points from this link based on\n // similarity.\n if (score > 0) {\n var similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio();\n // Subtract .1 from diff_percent when calculating modifier,\n // which means that if it's less than 10% different, we give a\n // bonus instead. Ex:\n // 3% different = +17.5 points\n // 10% different = 0 points\n // 20% different = -25 points\n var diffPercent = 1.0 - similarity;\n var diffModifier = -(250 * (diffPercent - 0.2));\n return score + diffModifier;\n }\n\n return 0;\n}\n\nfunction scoreLinkText(linkText, pageNum) {\n // If the link text can be parsed as a number, give it a minor\n // bonus, with a slight bias towards lower numbered pages. This is\n // so that pages that might not have 'next' in their text can still\n // get scored, and sorted properly by score.\n var score = 0;\n\n if (IS_DIGIT_RE.test(linkText.trim())) {\n var linkTextAsNum = parseInt(linkText, 10);\n // If it's the first page, we already got it on the first call.\n // Give it a negative score. Otherwise, up to page 10, give a\n // small bonus.\n if (linkTextAsNum < 2) {\n score = -30;\n } else {\n score = Math.max(0, 10 - linkTextAsNum);\n }\n\n // If it appears that the current page number is greater than\n // this links page number, it's a very bad sign. Give it a big\n // penalty.\n if (pageNum && pageNum >= linkTextAsNum) {\n score -= 50;\n }\n }\n\n return score;\n}\n\nfunction scorePageInLink(pageNum, isWp) {\n // page in the link = bonus. Intentionally ignore wordpress because\n // their ?p=123 link style gets caught by this even though it means\n // separate documents entirely.\n if (pageNum && !isWp) {\n return 50;\n }\n\n return 0;\n}\n\nvar DIGIT_RE$2 = /\\d/;\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nvar EXTRANEOUS_LINK_HINTS$1 = ['print', 'archive', 'comment', 'discuss', 'e-mail', 'email', 'share', 'reply', 'all', 'login', 'sign', 'single', 'adx', 'entry-unrelated'];\nvar EXTRANEOUS_LINK_HINTS_RE$1 = new RegExp(EXTRANEOUS_LINK_HINTS$1.join('|'), 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\nvar NEXT_LINK_TEXT_RE$1 = new RegExp('(next|weiter|continue|>([^|]|$)|»([^|]|$))', 'i');\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nvar CAP_LINK_TEXT_RE$1 = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nvar PREV_LINK_TEXT_RE$1 = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\n\nfunction scoreExtraneousLinks(href) {\n // If the URL itself contains extraneous values, give a penalty.\n if (EXTRANEOUS_LINK_HINTS_RE$1.test(href)) {\n return -25;\n }\n\n return 0;\n}\n\nfunction makeSig$1($link) {\n return ($link.attr('class') || '') + ' ' + ($link.attr('id') || '');\n}\n\nfunction scoreByParents$1($link) {\n // If a parent node contains paging-like classname or id, give a\n // bonus. Additionally, if a parent_node contains bad content\n // (like 'sponsor'), give a penalty.\n var $parent = $link.parent();\n var positiveMatch = false;\n var negativeMatch = false;\n var score = 0;\n\n _Array$from(range(0, 4)).forEach(function () {\n if ($parent.length === 0) {\n return;\n }\n\n var parentData = makeSig$1($parent, ' ');\n\n // If we have 'page' or 'paging' in our data, that's a good\n // sign. Add a bonus.\n if (!positiveMatch && PAGE_RE.test(parentData)) {\n positiveMatch = true;\n score += 25;\n }\n\n // If we have 'comment' or something in our data, and\n // we don't have something like 'content' as well, that's\n // a bad sign. Give a penalty.\n if (!negativeMatch && NEGATIVE_SCORE_RE.test(parentData) && EXTRANEOUS_LINK_HINTS_RE$1.test(parentData)) {\n if (!POSITIVE_SCORE_RE.test(parentData)) {\n negativeMatch = true;\n score -= 25;\n }\n }\n\n $parent = $parent.parent();\n });\n\n return score;\n}\n\nfunction scorePrevLink(linkData) {\n // If the link has something like \"previous\", its definitely\n // an old link, skip it.\n if (PREV_LINK_TEXT_RE$1.test(linkData)) {\n return -200;\n }\n\n return 0;\n}\n\nfunction shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls) {\n // skip if we've already fetched this url\n if (previousUrls.find(function (url) {\n return href === url;\n }) !== undefined) {\n return false;\n }\n\n // If we've already parsed this URL, or the URL matches the base\n // URL, or is empty, skip it.\n if (!href || href === articleUrl || href === baseUrl) {\n return false;\n }\n\n var hostname = parsedUrl.hostname;\n\n var _URL$parse = URL.parse(href),\n linkHost = _URL$parse.hostname;\n\n // Domain mismatch.\n\n\n if (linkHost !== hostname) {\n return false;\n }\n\n // If href doesn't contain a digit after removing the base URL,\n // it's certainly not the next page.\n var fragment = href.replace(baseUrl, '');\n if (!DIGIT_RE$2.test(fragment)) {\n return false;\n }\n\n // This link has extraneous content (like \"comment\") in its link\n // text, so we skip it.\n if (EXTRANEOUS_LINK_HINTS_RE$1.test(linkText)) {\n return false;\n }\n\n // Next page link text is never long, skip if it is too long.\n if (linkText.length > 25) {\n return false;\n }\n\n return true;\n}\n\nfunction scoreBaseUrl(href, baseRegex) {\n // If the baseUrl isn't part of this URL, penalize this\n // link. It could still be the link, but the odds are lower.\n // Example:\n // http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html\n if (!baseRegex.test(href)) {\n return -25;\n }\n\n return 0;\n}\n\nfunction scoreNextLinkText(linkData) {\n // Things like \"next\", \">>\", etc.\n if (NEXT_LINK_TEXT_RE$1.test(linkData)) {\n return 50;\n }\n\n return 0;\n}\n\nfunction scoreCapLinks(linkData) {\n // Cap links are links like \"last\", etc.\n if (CAP_LINK_TEXT_RE$1.test(linkData)) {\n // If we found a link like \"last\", but we've already seen that\n // this link is also \"next\", it's fine. If it's not been\n // previously marked as \"next\", then it's probably bad.\n // Penalize.\n if (NEXT_LINK_TEXT_RE$1.test(linkData)) {\n return -65;\n }\n }\n\n return 0;\n}\n\nfunction makeBaseRegex(baseUrl) {\n return new RegExp('^' + baseUrl, 'i');\n}\n\nfunction makeSig($link, linkText) {\n return (linkText || $link.text()) + ' ' + ($link.attr('class') || '') + ' ' + ($link.attr('id') || '');\n}\n\nfunction scoreLinks(_ref) {\n var links = _ref.links,\n articleUrl = _ref.articleUrl,\n baseUrl = _ref.baseUrl,\n parsedUrl = _ref.parsedUrl,\n $ = _ref.$,\n _ref$previousUrls = _ref.previousUrls,\n previousUrls = _ref$previousUrls === undefined ? [] : _ref$previousUrls;\n\n parsedUrl = parsedUrl || URL.parse(articleUrl);\n var baseRegex = makeBaseRegex(baseUrl);\n var isWp = isWordpress($);\n\n // Loop through all links, looking for hints that they may be next-page\n // links. Things like having \"page\" in their textContent, className or\n // id, or being a child of a node with a page-y className or id.\n //\n // After we do that, assign each page a score, and pick the one that\n // looks most like the next page link, as long as its score is strong\n // enough to have decent confidence.\n var scoredPages = links.reduce(function (possiblePages, link) {\n // Remove any anchor data since we don't do a good job\n // standardizing URLs (it's hard), we're going to do\n // some checking with and without a trailing slash\n var attrs = getAttrs(link);\n var href = removeAnchor(attrs.href);\n var $link = $(link);\n var linkText = $link.text();\n\n if (!shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls)) {\n return possiblePages;\n }\n\n // ## PASSED THE FIRST-PASS TESTS. Start scoring. ##\n if (!possiblePages[href]) {\n possiblePages[href] = {\n score: 0,\n linkText: linkText,\n href: href\n };\n } else {\n possiblePages[href].linkText = possiblePages[href].linkText + '|' + linkText;\n }\n\n var possiblePage = possiblePages[href];\n var linkData = makeSig($link, linkText);\n var pageNum = pageNumFromUrl(href);\n\n var score = scoreBaseUrl(href, baseRegex);\n score += scoreNextLinkText(linkData);\n score += scoreCapLinks(linkData);\n score += scorePrevLink(linkData);\n score += scoreByParents$1($link);\n score += scoreExtraneousLinks(href);\n score += scorePageInLink(pageNum, isWp);\n score += scoreLinkText(linkText, pageNum);\n score += scoreSimilarity(score, articleUrl, href);\n\n possiblePage.score = score;\n\n return possiblePages;\n }, {});\n\n return _Reflect$ownKeys(scoredPages).length === 0 ? null : scoredPages;\n}\n\n/* eslint-disable */\n// Looks for and returns next page url\n// for multi-page articles\nvar GenericNextPageUrlExtractor = {\n extract: function extract(_ref) {\n var $ = _ref.$,\n url = _ref.url,\n parsedUrl = _ref.parsedUrl,\n _ref$previousUrls = _ref.previousUrls,\n previousUrls = _ref$previousUrls === undefined ? [] : _ref$previousUrls;\n\n parsedUrl = parsedUrl || URL.parse(url);\n\n var articleUrl = removeAnchor(url);\n var baseUrl = articleBaseUrl(url, parsedUrl);\n\n var links = $('a[href]').toArray();\n\n var scoredLinks = scoreLinks({\n links: links,\n articleUrl: articleUrl,\n baseUrl: baseUrl,\n parsedUrl: parsedUrl,\n $: $,\n previousUrls: previousUrls\n });\n\n // If no links were scored, return null\n if (!scoredLinks) return null;\n\n // now that we've scored all possible pages,\n // find the biggest one.\n var topPage = _Reflect$ownKeys(scoredLinks).reduce(function (acc, link) {\n var scoredLink = scoredLinks[link];\n return scoredLink.score > acc.score ? scoredLink : acc;\n }, { score: -100 });\n\n // If the score is less than 50, we're not confident enough to use it,\n // so we fail.\n if (topPage.score >= 50) {\n return topPage.href;\n }\n\n return null;\n }\n};\n\nvar CANONICAL_META_SELECTORS = ['og:url'];\n\nfunction parseDomain(url) {\n var parsedUrl = URL.parse(url);\n var hostname = parsedUrl.hostname;\n\n return hostname;\n}\n\nfunction result(url) {\n return {\n url: url,\n domain: parseDomain(url)\n };\n}\n\nvar GenericUrlExtractor = {\n extract: function extract(_ref) {\n var $ = _ref.$,\n url = _ref.url,\n metaCache = _ref.metaCache;\n\n var $canonical = $('link[rel=canonical]');\n if ($canonical.length !== 0) {\n var href = $canonical.attr('href');\n if (href) {\n return result(href);\n }\n }\n\n var metaUrl = extractFromMeta$$1($, CANONICAL_META_SELECTORS, metaCache);\n if (metaUrl) {\n return result(metaUrl);\n }\n\n return result(url);\n }\n};\n\nvar EXCERPT_META_SELECTORS = ['og:description', 'twitter:description'];\n\nfunction clean$2(content, $) {\n var maxLength = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 200;\n\n content = content.replace(/[\\s\\n]+/g, ' ').trim();\n return ellipsize(content, maxLength, { ellipse: '…' });\n}\n\nvar GenericExcerptExtractor = {\n extract: function extract(_ref) {\n var $ = _ref.$,\n content = _ref.content,\n metaCache = _ref.metaCache;\n\n var excerpt = extractFromMeta$$1($, EXCERPT_META_SELECTORS, metaCache);\n if (excerpt) {\n return clean$2(stripTags(excerpt, $));\n }\n // Fall back to excerpting from the extracted content\n var maxLength = 200;\n var shortContent = content.slice(0, maxLength * 5);\n return clean$2($(shortContent).text(), $, maxLength);\n }\n};\n\nvar GenericWordCountExtractor = {\n extract: function extract(_ref) {\n var content = _ref.content;\n\n var $ = cheerio.load(content);\n var $content = $('div').first();\n\n var text = normalizeSpaces($content.text());\n return text.split(/\\s/).length;\n }\n};\n\nvar GenericExtractor = {\n // This extractor is the default for all domains\n domain: '*',\n title: GenericTitleExtractor.extract,\n date_published: GenericDatePublishedExtractor.extract,\n author: GenericAuthorExtractor.extract,\n content: GenericContentExtractor.extract.bind(GenericContentExtractor),\n lead_image_url: GenericLeadImageUrlExtractor.extract,\n dek: GenericDekExtractor.extract,\n next_page_url: GenericNextPageUrlExtractor.extract,\n url_and_domain: GenericUrlExtractor.extract,\n excerpt: GenericExcerptExtractor.extract,\n word_count: GenericWordCountExtractor.extract,\n direction: function direction(_ref) {\n var title = _ref.title;\n return stringDirection.getDirection(title);\n },\n\n extract: function extract(options) {\n var html = options.html,\n cheerio$$1 = options.cheerio,\n $ = options.$;\n\n\n if (html && !$) {\n var loaded = cheerio$$1.load(html);\n options.$ = loaded;\n }\n\n var title = this.title(options);\n var date_published = this.date_published(options);\n var author = this.author(options);\n var content = this.content(_extends({}, options, { title: title }));\n var lead_image_url = this.lead_image_url(_extends({}, options, { content: content }));\n var dek = this.dek(_extends({}, options, { content: content }));\n var next_page_url = this.next_page_url(options);\n var excerpt = this.excerpt(_extends({}, options, { content: content }));\n var word_count = this.word_count(_extends({}, options, { content: content }));\n var direction = this.direction({ title: title });\n\n var _url_and_domain = this.url_and_domain(options),\n url = _url_and_domain.url,\n domain = _url_and_domain.domain;\n\n return {\n title: title,\n author: author,\n date_published: date_published || null,\n dek: dek,\n lead_image_url: lead_image_url,\n content: content,\n next_page_url: next_page_url,\n url: url,\n domain: domain,\n excerpt: excerpt,\n word_count: word_count,\n direction: direction\n };\n }\n};\n\nfunction getExtractor(url, parsedUrl) {\n parsedUrl = parsedUrl || URL.parse(url);\n var _parsedUrl = parsedUrl,\n hostname = _parsedUrl.hostname;\n\n var baseDomain = hostname.split('.').slice(-2).join('.');\n\n return Extractors[hostname] || Extractors[baseDomain] || GenericExtractor;\n}\n\n/* eslint-disable */\n// Remove elements by an array of selectors\nfunction cleanBySelectors($content, $, _ref) {\n var clean = _ref.clean;\n\n if (!clean) return $content;\n\n $(clean.join(','), $content).remove();\n\n return $content;\n}\n\n// Transform matching elements\nfunction transformElements($content, $, _ref2) {\n var transforms = _ref2.transforms;\n\n if (!transforms) return $content;\n\n _Reflect$ownKeys(transforms).forEach(function (key) {\n var $matches = $(key, $content);\n var value = transforms[key];\n\n // If value is a string, convert directly\n if (typeof value === 'string') {\n $matches.each(function (index, node) {\n convertNodeTo$$1($(node), $, transforms[key]);\n });\n } else if (typeof value === 'function') {\n // If value is function, apply function to node\n $matches.each(function (index, node) {\n var result = value($(node), $);\n // If function returns a string, convert node to that value\n if (typeof result === 'string') {\n convertNodeTo$$1($(node), $, result);\n }\n });\n }\n });\n\n return $content;\n}\n\nfunction findMatchingSelector($, selectors) {\n return selectors.find(function (selector) {\n if (Array.isArray(selector)) {\n var _selector = _slicedToArray(selector, 2),\n s = _selector[0],\n attr = _selector[1];\n\n return $(s).length === 1 && $(s).attr(attr) && $(s).attr(attr).trim() !== '';\n }\n // debugger\n\n return $(selector).length === 1 && $(selector).text().trim() !== '';\n });\n}\n\nfunction select(opts) {\n var $ = opts.$,\n type = opts.type,\n extractionOpts = opts.extractionOpts,\n _opts$extractHtml = opts.extractHtml,\n extractHtml = _opts$extractHtml === undefined ? false : _opts$extractHtml;\n // Skip if there's not extraction for this type\n\n if (!extractionOpts) return null;\n\n // If a string is hardcoded for a type (e.g., Wikipedia\n // contributors), return the string\n if (typeof extractionOpts === 'string') return extractionOpts;\n\n var selectors = extractionOpts.selectors,\n _extractionOpts$defau = extractionOpts.defaultCleaner,\n defaultCleaner = _extractionOpts$defau === undefined ? true : _extractionOpts$defau;\n\n\n var matchingSelector = findMatchingSelector($, selectors);\n\n if (!matchingSelector) return null;\n\n // Declaring result; will contain either\n // text or html, which will be cleaned\n // by the appropriate cleaner type\n\n // If the selector type requests html as its return type\n // transform and clean the element with provided selectors\n if (extractHtml) {\n var $content = $(matchingSelector);\n\n // Wrap in div so transformation can take place on root element\n $content.wrap($(''));\n $content = $content.parent();\n\n $content = transformElements($content, $, extractionOpts);\n $content = cleanBySelectors($content, $, extractionOpts);\n\n $content = Cleaners[type]($content, _extends({}, opts, { defaultCleaner: defaultCleaner }));\n\n return $.html($content);\n }\n\n var result = void 0;\n\n // if selector is an array (e.g., ['img', 'src']),\n // extract the attr\n if (Array.isArray(matchingSelector)) {\n var _matchingSelector = _slicedToArray(matchingSelector, 2),\n selector = _matchingSelector[0],\n attr = _matchingSelector[1];\n\n result = $(selector).attr(attr).trim();\n } else {\n result = $(matchingSelector).text().trim();\n }\n\n // Allow custom extractor to skip default cleaner\n // for this type; defaults to true\n if (defaultCleaner) {\n return Cleaners[type](result, opts);\n }\n\n return result;\n}\n\nfunction extractResult(opts) {\n var type = opts.type,\n extractor = opts.extractor,\n _opts$fallback = opts.fallback,\n fallback = _opts$fallback === undefined ? true : _opts$fallback;\n\n\n var result = select(_extends({}, opts, { extractionOpts: extractor[type] }));\n\n // If custom parser succeeds, return the result\n if (result) {\n return result;\n }\n\n // If nothing matches the selector, and fallback is enabled,\n // run the Generic extraction\n if (fallback) return GenericExtractor[type](opts);\n\n return null;\n}\n\nvar RootExtractor = {\n extract: function extract() {\n var extractor = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : GenericExtractor;\n var opts = arguments[1];\n var _opts = opts,\n contentOnly = _opts.contentOnly,\n extractedTitle = _opts.extractedTitle;\n // This is the generic extractor. Run its extract method\n\n if (extractor.domain === '*') return extractor.extract(opts);\n\n opts = _extends({}, opts, {\n extractor: extractor\n });\n\n if (contentOnly) {\n var _content = extractResult(_extends({}, opts, { type: 'content', extractHtml: true, title: extractedTitle\n }));\n return {\n content: _content\n };\n }\n var title = extractResult(_extends({}, opts, { type: 'title' }));\n var date_published = extractResult(_extends({}, opts, { type: 'date_published' }));\n var author = extractResult(_extends({}, opts, { type: 'author' }));\n var next_page_url = extractResult(_extends({}, opts, { type: 'next_page_url' }));\n var content = extractResult(_extends({}, opts, { type: 'content', extractHtml: true, title: title\n }));\n var lead_image_url = extractResult(_extends({}, opts, { type: 'lead_image_url', content: content }));\n var excerpt = extractResult(_extends({}, opts, { type: 'excerpt', content: content }));\n var dek = extractResult(_extends({}, opts, { type: 'dek', content: content, excerpt: excerpt }));\n var word_count = extractResult(_extends({}, opts, { type: 'word_count', content: content }));\n var direction = extractResult(_extends({}, opts, { type: 'direction', title: title }));\n\n var _ref3 = extractResult(_extends({}, opts, { type: 'url_and_domain' })) || { url: null, domain: null },\n url = _ref3.url,\n domain = _ref3.domain;\n\n return {\n title: title,\n content: content,\n author: author,\n date_published: date_published,\n lead_image_url: lead_image_url,\n dek: dek,\n next_page_url: next_page_url,\n url: url,\n domain: domain,\n excerpt: excerpt,\n word_count: word_count,\n direction: direction\n };\n }\n};\n\nvar collectAllPages = (function () {\n var _ref = _asyncToGenerator(_regeneratorRuntime.mark(function _callee(_ref2) {\n var next_page_url = _ref2.next_page_url,\n html = _ref2.html,\n $ = _ref2.$,\n metaCache = _ref2.metaCache,\n result = _ref2.result,\n Extractor = _ref2.Extractor,\n title = _ref2.title,\n url = _ref2.url,\n cheerio$$1 = _ref2.cheerio;\n var pages, previousUrls, extractorOpts, nextPageResult, word_count;\n return _regeneratorRuntime.wrap(function _callee$(_context) {\n while (1) {\n switch (_context.prev = _context.next) {\n case 0:\n // At this point, we've fetched just the first page\n pages = 1;\n previousUrls = [removeAnchor(url)];\n\n // If we've gone over 26 pages, something has\n // likely gone wrong.\n\n case 2:\n if (!(next_page_url && pages < 26)) {\n _context.next = 15;\n break;\n }\n\n pages += 1;\n _context.next = 6;\n return Resource.create(next_page_url);\n\n case 6:\n $ = _context.sent;\n\n html = $.html();\n\n extractorOpts = {\n url: next_page_url,\n html: html,\n $: $,\n metaCache: metaCache,\n contentOnly: true,\n extractedTitle: title,\n previousUrls: previousUrls,\n cheerio: cheerio$$1\n };\n nextPageResult = RootExtractor.extract(Extractor, extractorOpts);\n\n\n previousUrls.push(next_page_url);\n result = _extends({}, result, {\n content: '\\n ' + result.content + '\\n \\n
' });\n return _context.abrupt('return', _extends({}, result, {\n total_pages: pages,\n pages_rendered: pages,\n word_count: word_count\n }));\n\n case 17:\n case 'end':\n return _context.stop();\n }\n }\n }, _callee, this);\n }));\n\n function collectAllPages(_x) {\n return _ref.apply(this, arguments);\n }\n\n return collectAllPages;\n})();\n\nvar Mercury = {\n parse: function parse(url, html) {\n var _this = this;\n\n var opts = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : {};\n return _asyncToGenerator(_regeneratorRuntime.mark(function _callee() {\n var _opts$fetchAllPages, fetchAllPages, _opts$fallback, fallback, parsedUrl, Extractor, $, metaCache, result, _result, title, next_page_url;\n\n return _regeneratorRuntime.wrap(function _callee$(_context) {\n while (1) {\n switch (_context.prev = _context.next) {\n case 0:\n _opts$fetchAllPages = opts.fetchAllPages, fetchAllPages = _opts$fetchAllPages === undefined ? true : _opts$fetchAllPages, _opts$fallback = opts.fallback, fallback = _opts$fallback === undefined ? true : _opts$fallback;\n parsedUrl = URL.parse(url);\n\n if (validateUrl(parsedUrl)) {\n _context.next = 4;\n break;\n }\n\n return _context.abrupt('return', Errors.badUrl);\n\n case 4:\n Extractor = getExtractor(url, parsedUrl);\n // console.log(`Using extractor for ${Extractor.domain}`);\n\n _context.next = 7;\n return Resource.create(url, html, parsedUrl);\n\n case 7:\n $ = _context.sent;\n\n if (!$.error) {\n _context.next = 10;\n break;\n }\n\n return _context.abrupt('return', $);\n\n case 10:\n\n html = $.html();\n\n // Cached value of every meta name in our document.\n // Used when extracting title/author/date_published/dek\n metaCache = $('meta').map(function (_, node) {\n return $(node).attr('name');\n }).toArray();\n result = RootExtractor.extract(Extractor, {\n url: url,\n html: html,\n $: $,\n metaCache: metaCache,\n parsedUrl: parsedUrl,\n fallback: fallback,\n cheerio: cheerio\n });\n _result = result, title = _result.title, next_page_url = _result.next_page_url;\n\n // Fetch more pages if next_page_url found\n\n if (!(fetchAllPages && next_page_url)) {\n _context.next = 20;\n break;\n }\n\n _context.next = 17;\n return collectAllPages({\n Extractor: Extractor,\n next_page_url: next_page_url,\n html: html,\n $: $,\n metaCache: metaCache,\n result: result,\n title: title,\n url: url,\n cheerio: cheerio\n });\n\n case 17:\n result = _context.sent;\n _context.next = 21;\n break;\n\n case 20:\n result = _extends({}, result, {\n total_pages: 1,\n rendered_pages: 1\n });\n\n case 21:\n return _context.abrupt('return', result);\n\n case 22:\n case 'end':\n return _context.stop();\n }\n }\n }, _callee, _this);\n }))();\n },\n\n\n // A convenience method for getting a resource\n // to work with, e.g., for custom extractor generator\n fetchResource: function fetchResource(url) {\n var _this2 = this;\n\n return _asyncToGenerator(_regeneratorRuntime.mark(function _callee2() {\n return _regeneratorRuntime.wrap(function _callee2$(_context2) {\n while (1) {\n switch (_context2.prev = _context2.next) {\n case 0:\n _context2.next = 2;\n return Resource.create(url);\n\n case 2:\n return _context2.abrupt('return', _context2.sent);\n\n case 3:\n case 'end':\n return _context2.stop();\n }\n }\n }, _callee2, _this2);\n }))();\n }\n};\n\nmodule.exports = Mercury;\n//# sourceMappingURL=mercury.js.map\n","export default function insertValues(strings, ...values) {\n if (values.length) {\n return strings.reduce((result, part, idx) => {\n let value = values[idx];\n\n if (value && typeof value.toString === 'function') {\n value = value.toString();\n } else {\n value = '';\n }\n\n return result + part + value;\n }, '');\n }\n\n return strings.join('');\n}\n","import insertValues from './insert-values';\n\nconst bodyPattern = /^\\n([\\s\\S]+)\\s{2}$/gm;\nconst trailingWhitespace = /\\s+$/;\n\nexport default function template(strings, ...values) {\n const compiled = insertValues(strings, ...values);\n let [body] = compiled.match(bodyPattern) || [];\n let indentLevel = /^\\s{0,4}(.+)$/g;\n\n if (!body) {\n body = compiled;\n indentLevel = /^\\s{0,2}(.+)$/g;\n }\n\n return body.split('\\n')\n .slice(1)\n .map((line) => {\n line = line.replace(indentLevel, '$1');\n\n if (trailingWhitespace.test(line)) {\n line = line.replace(trailingWhitespace, '');\n }\n\n return line;\n })\n .join('\\n');\n}\n","import template from './index';\n\nexport default function (hostname, name) {\n return template`\n export const ${name} = {\n domain: '${hostname}',\n\n title: {\n selectors: [\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n // enter author selectors\n ],\n },\n\n date_published: {\n selectors: [\n // enter selectors\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ]\n },\n }\n `;\n}\n","import template from './index';\n\nconst IGNORE = [\n 'url',\n 'domain',\n 'content',\n 'word_count',\n 'next_page_url',\n 'excerpt',\n 'direction',\n 'total_pages',\n 'rendered_pages',\n];\n\nfunction testFor(key, value, dir, file, url) {\n if (IGNORE.find(k => k === key)) return '';\n\n return template`\n it('returns the ${key}', async () => {\n // To pass this test, fill out the ${key} selector\n // in ${dir}/index.js.\n const html =\n fs.readFileSync('${file}');\n const articleUrl =\n '${url}';\n\n const { ${key} } =\n await Mercury.parse(articleUrl, html, { fallback: false });\n\n // Update these values with the expected values from\n // the article.\n assert.equal(${key}, ${value ? `\\`${value}\\`` : \"''\"})\n });\n `;\n}\n\nexport default function (file, url, dir, result, name) {\n return template`\n import assert from 'assert';\n import fs from 'fs';\n import URL from 'url';\n import cheerio from 'cheerio';\n\n import Mercury from 'mercury';\n import getExtractor from 'extractors/get-extractor';\n import { excerptContent } from 'utils/text';\n\n describe('${name}', () => {\n it('is selected properly', () => {\n // This test should be passing by default.\n // It sanity checks that the correct parser\n // is being selected for URLs from this domain\n const url =\n '${url}';\n const extractor = getExtractor(url);\n assert.equal(extractor.domain, URL.parse(url).hostname)\n })\n\n ${Reflect.ownKeys(result).map(k => testFor(k, result[k], dir, file, url)).join('\\n\\n')}\n\n it('returns the content', async () => {\n // To pass this test, fill out the content selector\n // in ${dir}/index.js.\n // You may also want to make use of the clean and transform\n // options.\n const html =\n fs.readFileSync('${file}');\n const url =\n '${url}';\n\n const { content } =\n await Mercury.parse(url, html, { fallback: false });\n\n const $ = cheerio.load(content || '');\n\n const first13 = excerptContent($('*').first().text(), 13)\n\n // Update these values with the expected values from\n // the article.\n assert.equal(first13, 'Add the first 13 words of the article here');\n });\n });\n `;\n}\n","/* eslint-disable import/no-extraneous-dependencies */\n/* eslint-disable no-use-before-define */\n/* eslint-disable no-console */\nimport fs from 'fs';\nimport URL from 'url';\nimport inquirer from 'inquirer';\nimport ora from 'ora';\nimport { exec } from 'child_process';\n\nimport {\n stripJunkTags,\n makeLinksAbsolute,\n} from 'utils/dom';\nimport Mercury from '../dist/mercury';\nimport extractorTemplate from './templates/custom-extractor';\nimport extractorTestTemplate from './templates/custom-extractor-test';\n\nconst questions = [\n {\n type: 'input',\n name: 'website',\n message: 'Paste a url to an article you\\'d like to create or extend a parser for:',\n validate(value) {\n const { hostname } = URL.parse(value);\n if (hostname) return true;\n\n return false;\n },\n },\n];\nlet spinner;\n\nfunction confirm(fn, args, msg, newParser) {\n spinner = ora({ text: msg });\n spinner.start();\n const result = fn(...args);\n\n if (result && result.then) {\n result.then(r => savePage(r, args, newParser));\n } else {\n spinner.succeed();\n }\n\n return result;\n}\n\nfunction confirmCreateDir(dir, msg) {\n if (!fs.existsSync(dir)) {\n confirm(fs.mkdirSync, [dir], msg);\n }\n}\n\nfunction getDir(url) {\n const { hostname } = URL.parse(url);\n return `./src/extractors/custom/${hostname}`;\n}\n\nfunction scaffoldCustomParser(url) {\n const dir = getDir(url);\n const { hostname } = URL.parse(url);\n let newParser = false;\n\n if (!fs.existsSync(dir)) {\n newParser = true;\n confirmCreateDir(dir, `Creating ${hostname} directory`);\n confirmCreateDir(`./fixtures/${hostname}`, 'Creating fixtures directory');\n }\n\n confirm(Mercury.fetchResource, [url], 'Fetching fixture', newParser);\n}\n\ninquirer.prompt(questions).then((answers) => {\n scaffoldCustomParser(answers.website);\n});\n\nfunction generateScaffold(url, file, result) {\n const { hostname } = URL.parse(url);\n const extractor = extractorTemplate(hostname, extractorName(hostname));\n const extractorTest =\n extractorTestTemplate(\n file, url, getDir(url), result, extractorName(hostname)\n );\n\n fs.writeFileSync(`${getDir(url)}/index.js`, extractor);\n fs.writeFileSync(`${getDir(url)}/index.test.js`, extractorTest);\n fs.appendFileSync(\n './src/extractors/custom/index.js',\n exportString(url),\n );\n exec(`npm run lint-fix-quiet -- ${getDir(url)}/*.js`);\n}\n\nfunction savePage($, [url], newParser) {\n const { hostname } = URL.parse(url);\n\n spinner.succeed();\n\n const filename = new Date().getTime();\n const file = `./fixtures/${hostname}/${filename}.html`;\n // fix http(s) relative links:\n makeLinksAbsolute($('*').first(), $, url);\n $('[src], [href]').each((index, node) => {\n const $node = $(node);\n const link = $node.attr('src');\n if (link && link.slice(0, 2) === '//') {\n $node.attr('src', `http:${link}`);\n }\n });\n const html = stripJunkTags($('*').first(), $, ['script']).html();\n\n fs.writeFileSync(file, html);\n\n Mercury.parse(url, html).then((result) => {\n if (newParser) {\n confirm(generateScaffold, [url, file, result], 'Generating parser and tests');\n console.log(`Your custom site extractor has been set up. To get started building it, run\n yarn watch:test -- ${hostname}\n -- OR --\n npm run watch:test -- ${hostname}`);\n } else {\n console.log(`\n It looks like you already have a custom parser for this url.\n The page you linked to has been added to ${file}. Copy and paste\n the following code to use that page in your tests:\n const html = fs.readFileSync('${file}');`);\n }\n });\n}\n\nfunction exportString(url) {\n const { hostname } = URL.parse(url);\n return `export * from './${hostname}';`;\n}\n\nfunction extractorName(hostname) {\n const name = hostname\n .split('.')\n .map(w => `${w.charAt(0).toUpperCase()}${w.slice(1)}`)\n .join('');\n return `${name}Extractor`;\n}\n"],"names":["SPACER_RE","RegExp","KEEP_CLASS","KEEP_SELECTORS","STRIP_OUTPUT_TAGS","REMOVE_ATTRS","REMOVE_ATTR_SELECTORS","map","selector","REMOVE_ATTR_LIST","join","WHITELIST_ATTRS","WHITELIST_ATTRS_RE","REMOVE_EMPTY_TAGS","REMOVE_EMPTY_SELECTORS","tag","CLEAN_CONDITIONALLY_TAGS","HEADER_TAGS","HEADER_TAG_LIST","UNLIKELY_CANDIDATES_BLACKLIST","UNLIKELY_CANDIDATES_WHITELIST","DIV_TO_P_BLOCK_TAGS","IS_WP_SELECTOR","BLOCK_LEVEL_TAGS","BLOCK_LEVEL_TAGS_RE","candidatesBlacklist","CANDIDATES_BLACKLIST","candidatesWhitelist","CANDIDATES_WHITELIST","brsToPs","$","collapsing","each","index","element","$element","nextElement","next","get","tagName","toLowerCase","remove","paragraphize","node","br","$node","sibling","nextSibling","p","test","appendTo","replaceWith","convertDivs","div","$div","convertable","children","length","convertSpans","span","$span","parents","convertNodeTo","attrs","getAttrs","attribString","key","html","browser","text","contents","cleanForHeight","$img","height","parseInt","attr","width","removeAttr","removeSpacers","stripJunkTags","article","tags","not","removeClass","removeAllButWhitelist","$article","find","reduce","acc","NON_TOP_CANDIDATE_TAGS","NON_TOP_CANDIDATE_TAGS_RE","HNEWS_CONTENT_SELECTORS","PHOTO_HINTS","PHOTO_HINTS_RE","POSITIVE_SCORE_HINTS","POSITIVE_SCORE_RE","READABILITY_ASSET","NEGATIVE_SCORE_HINTS","NEGATIVE_SCORE_RE","DIGIT_RE","BR_TAGS_RE","BR_TAG_RE","UNLIKELY_RE","PARAGRAPH_SCORE_TAGS","CHILD_CONTENT_TAGS","BAD_TAGS","HTML_OR_BODY_RE","getWeight","classes","id","score","getScore","parseFloat","scoreCommas","match","idkRe","scoreLength","textLength","chunks","lengthBonus","Math","min","max","scoreParagraph","trim","slice","setScore","addScore","amount","getOrInitScore","e","addToParent","parent","weightNodes","scoreNode","addScoreTo","scorePs","$parent","rawScore","NORMALIZE_RE","normalizeSpaces","replace","PAGE_IN_HREF_RE","HAS_ALPHA_RE","IS_ALPHA_RE","IS_DIGIT_RE","isGoodSegment","segment","firstSegmentHasLetters","goodSegment","SENTENCE_END_RE","hasSentenceEnd","mergeSiblings","$candidate","topScore","siblingScoreThreshold","wrappingDiv","$sibling","siblingScore","append","contentBonus","density","linkDensity","newScore","siblingContent","siblingContentLength","first","removeUnlessContent","weight","hasClass","content","pCount","inputCount","contentLength","imgCount","nodeIsList","previousNode","prev","scriptCount","absolutize","rootUrl","$content","_","url","absoluteUrl","URL","resolve","makeLinksAbsolute","forEach","totalTextLength","linkText","linkLength","isGoodNode","maxChildren","withinComment","stripTags","cleanText","toArray","commentParent","nodeClass","class","classAndId","includes","undefined","attribs","attributes","name","value","setAttr","val","setAttribute","setAttrs","removeAttribute","ex","_interopDefault","require$$20","require$$19","require$$18","require$$17","require$$16","require$$15","require$$14","require$$13","require$$12","require$$11","require$$10","require$$9","require$$8","require$$7","require$$6","require$$5","require$$4","require$$3","require$$2","require$$1","require$$0","range","_regeneratorRuntime","mark","arguments","wrap","_context","start","end","stop","_marked","_ref","hostname","badUrl","error","messages","BAD_CONTENT_TYPES","options","reject","request","err","response","body","statusMessage","statusCode","parseNon2xx","headers","contentType","_response$headers","BAD_CONTENT_TYPES_RE","MAX_CONTENT_LENGTH","_asyncToGenerator","parsedUrl","_ref3","parse","encodeURI","_extends","REQUEST_HEADERS","timeout","FETCH_TIMEOUT","encoding","jar","gzip","followAllRedirects","sent","validateResponse","abrupt","t0","Errors","_callee","_x2","_x3","apply","from","to","convertMetaProp","convertNodeTo$$1","brsToPs$$1","_Reflect$ownKeys","img","protocol","_URL$parse","concat","_toConsumableArray","addClass","$hOnes","_defineProperty","$p","NON_TOP_CANDIDATE_TAGS$1","PHOTO_HINTS$1","POSITIVE_SCORE_HINTS$1","NEGATIVE_SCORE_HINTS$1","UNLIKELY_CANDIDATES_BLACKLIST$1","UNLIKELY_CANDIDATES_WHITELIST$1","POSITIVE_SCORE_RE$1","NEGATIVE_SCORE_RE$1","PHOTO_HINTS_RE$1","READABILITY_ASSET$1","getOrInitScore$$1","addScore$$1","scoreNode$$1","addToParent$$1","_$node$get","PARAGRAPH_SCORE_TAGS$1","CHILD_CONTENT_TAGS$1","BAD_TAGS$1","convertSpans$1","HNEWS_CONTENT_SELECTORS$1","_slicedToArray","parentSelector","_ref2","childSelector","regexList","re","matchRe","exec","matches","pageNum","split","parsed","host","path","reverse","rawSegment","_segment$split2","_segment$split","possibleSegment","fileExt","push","cleanedSegments","words","NON_TOP_CANDIDATE_TAGS_RE$1","header","$header","prevAll","title","metaNames","cachedNames","filter","indexOf","_step","type","nodes","values","cleanTags$$1","metaValue","v","_getIterator","foundNames","_iteratorNormalCompletion","_iterator","done","_loop","_typeof","_ret","_didIteratorError","_iteratorError","return","withinComment$$1","selectors","textOnly","IS_LINK","IS_IMAGE","isComment","TAGS_TO_REMOVE","cleanComments","create","preparedResponse","validResponse","result","_this","generateDoc","cheerio","load","normalizeWhitespace","normalizeMetaTags","convertLazyLoadedImages","clean","extractor","domains","domain","supportedDomains","merge","transforms","noscript","author","date_published","h1","$children","_$children","dek","defaultCleaner","prepend","$tweetContainer","tweets","s","src","lead_image_url","next_page_url","excerpt","h2","youtubeId","JSON","data","sources","iframe","decodeURIComponent","ytRe","thumb","_thumb$match2","_thumb$match","clone","BloggerExtractor","NYMagExtractor","WikipediaExtractor","TwitterExtractor","NYTimesExtractor","TheAtlanticExtractor","NewYorkerExtractor","WiredExtractor","MSNExtractor","YahooExtractor","BuzzfeedExtractor","WikiaExtractor","LittleThingsExtractor","PoliticoExtractor","DeadspinExtractor","BroadwayWorldExtractor","ApartmentTherapyExtractor","MediumExtractor","_Object$keys","CustomExtractors","mergeSupportedDomains","months","timestamp1","timestamp2","allMonths","CLEAN_AUTHOR_RE","leadImageUrl","validUrl","isWebUri","excerptContent","TEXT_LINK_RE","dekText","dateString","SPLIT_DATE_STRING","TIME_MERIDIAN_DOTS_RE","TIME_MERIDIAN_SPACE_RE","CLEAN_DATE_STRING_RE","MS_DATE_STRING","SEC_DATE_STRING","moment","date","isValid","cleanDateString","toISOString","_ref$cleanConditional","cleanConditionally","_ref$title","_ref$url","_ref$defaultCleaner","rewriteTopLevel$$1","cleanImages","markToKeep","cleanHOnes$$1","cleanHeaders","makeLinksAbsolute$$1","removeEmpty","cleanAttributes$$1","TITLE_SPLITTERS_RE","resolveSplitTitle","splitTitle","titleText","termCounts","_Reflect$ownKeys$redu2","_Reflect$ownKeys$redu","maxTerm","termCount","splitEnds","longestEnd","DOMAIN_ENDINGS_RE","wuzzy","levenshtein","startSlug","nakedDomain","startSlugRatio","endSlug","endSlugRatio","extractBreadcrumbTitle","newTitle","cleanDomainFromTitle","cleanAuthor","clean$1","cleanDek","cleanDatePublished","extractCleanNode","cleanTitle$$1","opts","stripUnlikelyCandidates","convertToParagraphs$$1","scoreContent$$1","findTopCandidate$$1","defaultOpts","extract","cheerio$$1","getContentNode","nodeIsSufficient","cleanAndReturnNode","k","extractBestNode","metaCache","extractFromMeta$$1","STRONG_TITLE_META_TAGS","extractFromSelectors$$1","STRONG_TITLE_SELECTORS","WEAK_TITLE_META_TAGS","WEAK_TITLE_SELECTORS","bylineRe","AUTHOR_META_TAGS","AUTHOR_MAX_LENGTH","AUTHOR_SELECTORS","BYLINE_SELECTORS_RE","_ref4","regex","abbrevMonthsStr","datePublished","DATE_PUBLISHED_META_TAGS","DATE_PUBLISHED_SELECTORS","extractFromUrl","DATE_PUBLISHED_URL_RES","POSITIVE_LEAD_IMAGE_URL_HINTS","NEGATIVE_LEAD_IMAGE_URL_HINTS","POSITIVE_LEAD_IMAGE_URL_HINTS_RE","NEGATIVE_LEAD_IMAGE_URL_HINTS_RE","GIF_RE","JPG_RE","$figParent","$gParent","getSig","area","round","$imgs","LEAD_IMAGE_URL_META_TAGS","imageUrl","cleanUrl","imgs","scoreImageUrl","scoreAttr","scoreByParents","scoreBySibling","scoreByDimensions","scoreByPosition","imgScores","topUrl","LEAD_IMAGE_URL_SELECTORS","href","articleUrl","SequenceMatcher","ratio","similarity","diffPercent","diffModifier","linkTextAsNum","isWp","EXTRANEOUS_LINK_HINTS$1","EXTRANEOUS_LINK_HINTS_RE$1","$link","_Array$from","makeSig$1","positiveMatch","PAGE_RE","parentData","negativeMatch","linkData","PREV_LINK_TEXT_RE$1","baseUrl","previousUrls","linkHost","DIGIT_RE$2","fragment","baseRegex","NEXT_LINK_TEXT_RE$1","CAP_LINK_TEXT_RE$1","links","_ref$previousUrls","makeBaseRegex","isWordpress","possiblePages","link","removeAnchor","shouldScore","makeSig","pageNumFromUrl","scoreBaseUrl","scoreNextLinkText","scoreCapLinks","scorePrevLink","scoreByParents$1","scoreExtraneousLinks","scorePageInLink","scoreLinkText","scoreSimilarity","possiblePage","scoredPages","articleBaseUrl","scoreLinks","scoredLinks","scoredLink","topPage","parseDomain","$canonical","CANONICAL_META_SELECTORS","metaUrl","maxLength","ellipse","EXCERPT_META_SELECTORS","shortContent","GenericTitleExtractor","GenericDatePublishedExtractor","GenericAuthorExtractor","GenericContentExtractor","bind","GenericLeadImageUrlExtractor","GenericDekExtractor","GenericNextPageUrlExtractor","url_and_domain","GenericUrlExtractor","GenericExcerptExtractor","word_count","GenericWordCountExtractor","direction","getDirection","loaded","_url_and_domain","_parsedUrl","Extractors","baseDomain","GenericExtractor","$matches","Array","isArray","_selector","extractionOpts","_opts$extractHtml","extractHtml","_extractionOpts$defau","findMatchingSelector","matchingSelector","transformElements","cleanBySelectors","Cleaners","_matchingSelector","_opts$fallback","fallback","select","contentOnly","_opts","extractedTitle","extractResult","_content","Extractor","extractorOpts","nextPageResult","pages","RootExtractor","total_pages","pages_rendered","_x","fetchAllPages","_result","_opts$fetchAllPages","validateUrl","getExtractor","rendered_pages","fetchResource","_context2","_callee2","_this2","Mercury","insertValues","strings","part","idx","toString","bodyPattern","trailingWhitespace","template","compiled","indentLevel","line","IGNORE","testFor","dir","file","questions","spinner","confirm","fn","args","msg","newParser","ora","then","savePage","r","succeed","confirmCreateDir","fs","existsSync","mkdirSync","getDir","scaffoldCustomParser","inquirer","prompt","answers","website","generateScaffold","extractorTemplate","extractorName","extractorTest","extractorTestTemplate","writeFileSync","appendFileSync","exportString","filename","Date","getTime","log","w","charAt","toUpperCase"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AACA,AAAO,IAAMA,YAAY,IAAIC,MAAJ,CAAW,gCAAX,EAA6C,GAA7C,CAAlB;;;;AAIP,AAAO,IAAMC,aAAa,qBAAnB;;AAEP,AAAO,IAAMC,iBAAiB,CAC5B,wCAD4B,EAE5B,uCAF4B,EAG5B,qCAH4B,EAI5B,oCAJ4B,CAAvB;;;AAQP,AAAO,IAAMC,oBAAoB,CAC/B,OAD+B,EAE/B,QAF+B,EAG/B,UAH+B,EAI/B,MAJ+B,EAK/B,OAL+B,EAM/B,IAN+B,EAO/B,OAP+B,EAQ/B,QAR+B,EAS/B,QAT+B,CAA1B;;;AAaP,AAAO,IAAMC,eAAe,CAAC,OAAD,EAAU,OAAV,CAArB;AACP,AAAO,IAAMC,wBAAwBD,aAAaE,GAAb,CAAiB;eAAgBC,QAAhB;CAAjB,CAA9B;AACP,AAAO,IAAMC,mBAAmBJ,aAAaK,IAAb,CAAkB,GAAlB,CAAzB;AACP,AAAO,IAAMC,kBAAkB,CAAC,KAAD,EAAQ,QAAR,EAAkB,MAAlB,EAA0B,OAA1B,EAAmC,IAAnC,EAAyC,KAAzC,CAAxB;AACP,AAAO,IAAMC,qBAAqB,IAAIX,MAAJ,QAAgBU,gBAAgBD,IAAhB,CAAqB,GAArB,CAAhB,SAA+C,GAA/C,CAA3B;;;AAGP,AAAO,IAAMG,oBAAoB,CAAC,GAAD,CAA1B;AACP,AAAO,IAAMC,yBAAyBD,kBAAkBN,GAAlB,CAAsB;SAAUQ,GAAV;CAAtB,EAA6CL,IAA7C,CAAkD,GAAlD,CAA/B;;;AAGP,AAAO,IAAMM,2BAA2B,CAAC,IAAD,EAAO,IAAP,EAAa,OAAb,EAAsB,KAAtB,EAA6B,QAA7B,EAAuC,MAAvC,EAA+CN,IAA/C,CAAoD,GAApD,CAAjC;;;AAGP,IAAMO,cAAc,CAAC,IAAD,EAAO,IAAP,EAAa,IAAb,EAAmB,IAAnB,EAAyB,IAAzB,CAApB;AACA,AAAO,IAAMC,kBAAkBD,YAAYP,IAAZ,CAAiB,GAAjB,CAAxB;;;;;;;;AAQP,AAAO,IAAMS,gCAAgC,CAC3C,UAD2C,EAE3C,OAF2C,EAG3C,QAH2C,EAI3C,SAJ2C,EAK3C,SAL2C,EAM3C,KAN2C,EAO3C,gBAP2C,EAQ3C,OAR2C,EAS3C,SAT2C,EAU3C,cAV2C,EAW3C,QAX2C,EAY3C,iBAZ2C,EAa3C,OAb2C,EAc3C,MAd2C;;AAgB3C,QAhB2C,EAiB3C,QAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C;AAoB3C,MApB2C,EAqB3C,MArB2C,EAsB3C,KAtB2C,EAuB3C,UAvB2C,EAwB3C,OAxB2C,EAyB3C,YAzB2C,EA0B3C,UA1B2C;AA2B3C,2BA3B2C;AA4B3C,OA5B2C,EA6B3C,eA7B2C,EA8B3C,SA9B2C,EA+B3C,QA/B2C,EAgC3C,QAhC2C,EAiC3C,KAjC2C,EAkC3C,OAlC2C,EAmC3C,UAnC2C,EAoC3C,SApC2C,EAqC3C,UArC2C,EAsC3C,SAtC2C,EAuC3C,SAvC2C,EAwC3C,OAxC2C,CAAtC;;;;;;;;;;;;;AAsDP,AAAO,IAAMC,gCAAgC,CAC3C,KAD2C,EAE3C,SAF2C,EAG3C,MAH2C,EAI3C,WAJ2C,EAK3C,QAL2C,EAM3C,SAN2C,EAO3C,qBAP2C,EAQ3C,QAR2C;AAS3C,OAT2C,EAU3C,QAV2C,EAW3C,OAX2C,EAY3C,MAZ2C,EAa3C,MAb2C,EAc3C,OAd2C,EAe3C,QAf2C,CAAtC;;;;;AAqBP,AAAO,IAAMC,sBAAsB,CACjC,GADiC,EAEjC,YAFiC,EAGjC,IAHiC,EAIjC,KAJiC,EAKjC,KALiC,EAMjC,GANiC,EAOjC,KAPiC,EAQjC,OARiC,EASjCX,IATiC,CAS5B,GAT4B,CAA5B;;;;AAaP,AAAO;;AAeP,AAAO;;;;;AAMP,AAAO;;AASP,AAAO;AAMP,AAAO;;;;;;AAMP,AAAO;;;AAuBP,AAAO;;;AAGP,AAAO;;;;;;AAMP,AAAO;;AA0DP,AAAO;;;AAGP,AAAO,IAAMY,iBAAiB,wCAAvB;;;AAGP,AAAO;;;;AAIP,AAAO;AAgBP,AAAO;;;AAGP,AAAO;;;;;;AAMP,AAAO;;;;AAIP,AAAO;;;;AAIP,AAAO;;;AAGP,AAAO;;;AAGP,AAAO;;;;AAIP,AAAO,IAAMC,mBAAmB,CAC9B,SAD8B,EAE9B,OAF8B,EAG9B,YAH8B,EAI9B,MAJ8B,EAK9B,IAL8B,EAM9B,QAN8B,EAO9B,QAP8B,EAQ9B,SAR8B,EAS9B,KAT8B,EAU9B,UAV8B,EAW9B,IAX8B,EAY9B,KAZ8B,EAa9B,IAb8B,EAc9B,IAd8B,EAe9B,OAf8B,EAgB9B,UAhB8B,EAiB9B,YAjB8B,EAkB9B,QAlB8B,EAmB9B,QAnB8B,EAoB9B,MApB8B,EAqB9B,IArB8B,EAsB9B,IAtB8B,EAuB9B,IAvB8B,EAwB9B,IAxB8B,EAyB9B,IAzB8B,EA0B9B,IA1B8B,EA2B9B,QA3B8B,EA4B9B,QA5B8B,EA6B9B,IA7B8B,EA8B9B,IA9B8B,EA+B9B,KA/B8B,EAgC9B,QAhC8B,EAiC9B,IAjC8B,EAkC9B,QAlC8B,EAmC9B,GAnC8B,EAoC9B,KApC8B,EAqC9B,UArC8B,EAsC9B,SAtC8B,EAuC9B,OAvC8B,EAwC9B,OAxC8B,EAyC9B,UAzC8B,EA0C9B,OA1C8B,EA2C9B,IA3C8B,EA4C9B,OA5C8B,EA6C9B,IA7C8B,EA8C9B,IA9C8B,EA+C9B,OA/C8B,CAAzB;AAiDP,AAAO,IAAMC,sBAAsB,IAAIvB,MAAJ,QAAgBsB,iBAAiBb,IAAjB,CAAsB,GAAtB,CAAhB,SAAgD,GAAhD,CAA5B;;;;;;AAMP,IAAMe,sBAAsBN,8BAA8BT,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,IAAMgB,uBAAuB,IAAIzB,MAAJ,CAAWwB,mBAAX,EAAgC,GAAhC,CAA7B;;AAEP,IAAME,sBAAsBP,8BAA8BV,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,IAAMkB,uBAAuB,IAAI3B,MAAJ,CAAW0B,mBAAX,EAAgC,GAAhC,CAA7B,CAEP,AAAO,AAEP,AAAO,AACP,AAAO,AACP,AAAO,AAEP,AAAO;;ACrYP;;;;;;;;;AASA,AAAe,SAASE,UAAT,CAAiBC,CAAjB,EAAoB;MAC7BC,aAAa,KAAjB;IACE,IAAF,EAAQC,IAAR,CAAa,UAACC,KAAD,EAAQC,OAAR,EAAoB;QACzBC,WAAWL,EAAEI,OAAF,CAAjB;QACME,cAAcD,SAASE,IAAT,GAAgBC,GAAhB,CAAoB,CAApB,CAApB;;QAEIF,eAAeA,YAAYG,OAAZ,CAAoBC,WAApB,OAAsC,IAAzD,EAA+D;mBAChD,IAAb;eACSC,MAAT;KAFF,MAGO,IAAIV,UAAJ,EAAgB;mBACR,KAAb;;mBAEaG,OAAb,EAAsBJ,CAAtB,EAAyB,IAAzB;;GAVJ;;SAcOA,CAAP;;;ACzBF;;;;;;;;;;;AAWA,AAAe,SAASY,YAAT,CAAsBC,IAAtB,EAA4Bb,CAA5B,EAA2C;MAAZc,EAAY,uEAAP,KAAO;;MAClDC,QAAQf,EAAEa,IAAF,CAAd;;MAEIC,EAAJ,EAAQ;QACFE,UAAUH,KAAKI,WAAnB;QACMC,IAAIlB,EAAE,SAAF,CAAV;;;;WAIOgB,WAAW,EAAEA,QAAQP,OAAR,IAAmBf,oBAAoByB,IAApB,CAAyBH,QAAQP,OAAjC,CAArB,CAAlB,EAAmF;UAC3EQ,cAAcD,QAAQC,WAA5B;QACED,OAAF,EAAWI,QAAX,CAAoBF,CAApB;gBACUD,WAAV;;;UAGII,WAAN,CAAkBH,CAAlB;UACMP,MAAN;WACOX,CAAP;;;SAGKA,CAAP;;;AC7BF,SAASsB,WAAT,CAAqBtB,CAArB,EAAwB;IACpB,KAAF,EAASE,IAAT,CAAc,UAACC,KAAD,EAAQoB,GAAR,EAAgB;QACtBC,OAAOxB,EAAEuB,GAAF,CAAb;QACME,cAAcD,KAAKE,QAAL,CAAcnC,mBAAd,EAAmCoC,MAAnC,KAA8C,CAAlE;;QAEIF,WAAJ,EAAiB;uBACDD,IAAd,EAAoBxB,CAApB,EAAuB,GAAvB;;GALJ;;SASOA,CAAP;;;AAGF,SAAS4B,YAAT,CAAsB5B,CAAtB,EAAyB;IACrB,MAAF,EAAUE,IAAV,CAAe,UAACC,KAAD,EAAQ0B,IAAR,EAAiB;QACxBC,QAAQ9B,EAAE6B,IAAF,CAAd;QACMJ,cAAcK,MAAMC,OAAN,CAAc,QAAd,EAAwBJ,MAAxB,KAAmC,CAAvD;QACIF,WAAJ,EAAiB;uBACDK,KAAd,EAAqB9B,CAArB,EAAwB,GAAxB;;GAJJ;;SAQOA,CAAP;CAGF;;AC3Be,SAASgC,gBAAT,CAAuBjB,KAAvB,EAA8Bf,CAA9B,EAA4C;MAAXf,GAAW,uEAAL,GAAK;;MACnD4B,OAAOE,MAAMP,GAAN,CAAU,CAAV,CAAb;MACI,CAACK,IAAL,EAAW;WACFb,CAAP;;MAEIiC,QAAQC,SAASrB,IAAT,KAAkB,EAAhC;;;MAGMsB,eAAe,iBAAgBF,KAAhB,EACQxD,GADR,CACY;WAAU2D,GAAV,SAAiBH,MAAMG,GAAN,CAAjB;GADZ,EAEQxD,IAFR,CAEa,GAFb,CAArB;MAGIyD,aAAJ;;MAEIrC,EAAEsC,OAAN,EAAe;;;;WAINzB,KAAKJ,OAAL,CAAaC,WAAb,OAA+B,UAA/B,GAA4CK,MAAMwB,IAAN,EAA5C,GAA2DxB,MAAMsB,IAAN,EAAlE;GAJF,MAKO;WACEtB,MAAMyB,QAAN,EAAP;;QAEInB,WAAN,OACMpC,GADN,SACakD,YADb,SAC6BE,IAD7B,UACsCpD,GADtC;SAGOe,CAAP;;;ACxBF,SAASyC,cAAT,CAAwBC,IAAxB,EAA8B1C,CAA9B,EAAiC;MACzB2C,SAASC,SAASF,KAAKG,IAAL,CAAU,QAAV,CAAT,EAA8B,EAA9B,CAAf;MACMC,QAAQF,SAASF,KAAKG,IAAL,CAAU,OAAV,CAAT,EAA6B,EAA7B,KAAoC,EAAlD;;;;;MAKI,CAACF,UAAU,EAAX,IAAiB,EAAjB,IAAuBG,QAAQ,EAAnC,EAAuC;SAChCnC,MAAL;GADF,MAEO,IAAIgC,MAAJ,EAAY;;;;SAIZI,UAAL,CAAgB,QAAhB;;;SAGK/C,CAAP;;;;;AAKF,SAASgD,aAAT,CAAuBN,IAAvB,EAA6B1C,CAA7B,EAAgC;MAC1B9B,UAAUiD,IAAV,CAAeuB,KAAKG,IAAL,CAAU,KAAV,CAAf,CAAJ,EAAsC;SAC/BlC,MAAL;;;SAGKX,CAAP;CAGF;;AC1Be,SAASiD,aAAT,CAAuBC,OAAvB,EAAgClD,CAAhC,EAA8C;MAAXmD,IAAW,uEAAJ,EAAI;;MACvDA,KAAKxB,MAAL,KAAgB,CAApB,EAAuB;WACdrD,iBAAP;;;;;IAKA6E,KAAKvE,IAAL,CAAU,GAAV,CAAF,EAAkBsE,OAAlB,EAA2BE,GAA3B,OAAmChF,UAAnC,EAAiDuC,MAAjD;;;UAGMvC,UAAN,EAAoB8E,OAApB,EAA6BG,WAA7B,CAAyCjF,UAAzC;;SAEO4B,CAAP;;;ACVF,SAASsD,qBAAT,CAA+BC,QAA/B,EAAyC;WAC9BC,IAAT,CAAc,GAAd,EAAmBtD,IAAnB,CAAwB,UAACC,KAAD,EAAQU,IAAR,EAAiB;QACjCoB,QAAQC,SAASrB,IAAT,CAAd;;aAESA,IAAT,EAAe,iBAAgBoB,KAAhB,EAAuBwB,MAAvB,CAA8B,UAACC,GAAD,EAAMb,IAAN,EAAe;UACtD/D,mBAAmBqC,IAAnB,CAAwB0B,IAAxB,CAAJ,EAAmC;4BACrBa,GAAZ,sBAAkBb,IAAlB,EAAyBZ,MAAMY,IAAN,CAAzB;;;aAGKa,GAAP;KALa,EAMZ,EANY,CAAf;GAHF;;SAYOH,QAAP;CAGF;;ACvBA;;;;;;AAMA,AAAO,IAAMlE,kCAAgC,CAC3C,UAD2C,EAE3C,OAF2C,EAG3C,QAH2C,EAI3C,SAJ2C,EAK3C,SAL2C,EAM3C,KAN2C,EAO3C,gBAP2C,EAQ3C,OAR2C,EAS3C,SAT2C,EAU3C,cAV2C,EAW3C,QAX2C,EAY3C,iBAZ2C,EAa3C,OAb2C,EAc3C,MAd2C,EAe3C,MAf2C,EAgB3C,QAhB2C,EAiB3C,QAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C;AAoB3C,MApB2C,EAqB3C,MArB2C,EAsB3C,KAtB2C,EAuB3C,OAvB2C,EAwB3C,YAxB2C,EAyB3C,UAzB2C;AA0B3C,2BA1B2C;AA2B3C,OA3B2C,EA4B3C,eA5B2C,EA6B3C,SA7B2C,EA8B3C,QA9B2C,EA+B3C,QA/B2C,EAgC3C,KAhC2C,EAiC3C,OAjC2C,EAkC3C,UAlC2C,EAmC3C,SAnC2C,EAoC3C,UApC2C,EAqC3C,SArC2C,EAsC3C,OAtC2C,CAAtC;;;;;;;;;;;;;AAoDP,AAAO,IAAMC,kCAAgC,CAC3C,KAD2C,EAE3C,SAF2C,EAG3C,MAH2C,EAI3C,WAJ2C,EAK3C,QAL2C,EAM3C,SAN2C,EAO3C,qBAP2C,EAQ3C,QAR2C;AAS3C,OAT2C,EAU3C,QAV2C,EAW3C,OAX2C,EAY3C,MAZ2C,EAa3C,MAb2C,EAc3C,OAd2C,EAe3C,QAf2C,CAAtC;;;;;AAqBP,AAAO,IAAMC,wBAAsB,CACjC,GADiC,EAEjC,YAFiC,EAGjC,IAHiC,EAIjC,KAJiC,EAKjC,KALiC,EAMjC,GANiC,EAOjC,KAPiC,EAQjC,OARiC,EASjCX,IATiC,CAS5B,GAT4B,CAA5B;;;;AAaP,AAAO,IAAM+E,2BAAyB,CACpC,IADoC,EAEpC,GAFoC,EAGpC,GAHoC,EAIpC,OAJoC,EAKpC,IALoC,EAMpC,MANoC,EAOpC,MAPoC,EAQpC,UARoC,EASpC,OAToC,EAUpC,KAVoC,EAWpC,MAXoC,EAYpC,MAZoC,CAA/B;;AAeP,AAAO,IAAMC,8BACX,IAAIzF,MAAJ,QAAgBwF,yBAAuB/E,IAAvB,CAA4B,GAA5B,CAAhB,SAAsD,GAAtD,CADK;;;;;AAMP,AAAO,IAAMiF,4BAA0B,CACrC,CAAC,SAAD,EAAY,gBAAZ,CADqC,EAErC,CAAC,OAAD,EAAU,gBAAV,CAFqC,EAGrC,CAAC,QAAD,EAAW,gBAAX,CAHqC,EAIrC,CAAC,OAAD,EAAU,WAAV,CAJqC,EAKrC,CAAC,OAAD,EAAU,YAAV,CALqC,EAMrC,CAAC,OAAD,EAAU,YAAV,CANqC,CAAhC;;AASP,AAAO,IAAMC,gBAAc,CACzB,QADyB,EAEzB,OAFyB,EAGzB,OAHyB,EAIzB,SAJyB,CAApB;AAMP,AAAO,IAAMC,mBAAiB,IAAI5F,MAAJ,CAAW2F,cAAYlF,IAAZ,CAAiB,GAAjB,CAAX,EAAkC,GAAlC,CAAvB;;;;;;AAMP,AAAO,IAAMoF,yBAAuB,CAClC,SADkC,EAElC,gBAFkC,EAGlC,iBAHkC,EAIlC,MAJkC,EAKlC,MALkC,EAMlC,SANkC,EAOlC,qBAPkC,EAQlC,OARkC,EASlC,QATkC,EAUlC,MAVkC,EAWlC,QAXkC,EAYlC,MAZkC,EAalC,YAbkC,EAclC,WAdkC,EAelC,MAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,UAlBkC;AAmBlC,SAnBkC,CAA7B;;;AAuBP,AAAO,IAAMC,sBAAoB,IAAI9F,MAAJ,CAAW6F,uBAAqBpF,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO,IAAMsF,sBAAoB,IAAI/F,MAAJ,CAAW,qBAAX,EAAkC,GAAlC,CAA1B;;;;;;AAMP,AAAO,IAAMgG,yBAAuB,CAClC,OADkC,EAElC,QAFkC,EAGlC,QAHkC,EAIlC,KAJkC,EAKlC,UALkC,EAMlC,QANkC,EAOlC,QAPkC,EAQlC,OARkC,EASlC,MATkC,EAUlC,OAVkC,EAWlC,SAXkC,EAYlC,YAZkC,EAalC,SAbkC,EAclC,MAdkC,EAelC,QAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,MAlBkC,EAmBlC,SAnBkC,EAoBlC,UApBkC;AAqBlC,MArBkC,EAsBlC,QAtBkC,EAuBlC,UAvBkC,EAwBlC,MAxBkC,EAyBlC,MAzBkC,EA0BlC,MA1BkC,EA2BlC,UA3BkC;AA4BlC,mBA5BkC,EA6BlC,MA7BkC,EA8BlC,WA9BkC,EA+BlC,MA/BkC,EAgClC,UAhCkC,EAiClC,OAjCkC,EAkClC,MAlCkC,EAmClC,OAnCkC,EAoClC,UApCkC;AAqClC,OArCkC,EAsClC,KAtCkC;AAuClC,SAvCkC,EAwClC,SAxCkC,EAyClC,cAzCkC;AA0ClC,QA1CkC,EA2ClC,WA3CkC,EA4ClC,OA5CkC,EA6ClC,UA7CkC,EA8ClC,UA9CkC,EA+ClC,MA/CkC,EAgDlC,SAhDkC,EAiDlC,SAjDkC,EAkDlC,OAlDkC,EAmDlC,KAnDkC,EAoDlC,SApDkC,EAqDlC,MArDkC,EAsDlC,OAtDkC,EAuDlC,QAvDkC,CAA7B;;AA0DP,AAAO,IAAMC,sBAAoB,IAAIjG,MAAJ,CAAWgG,uBAAqBvF,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO,AAAMyF;;;AAGb,AAAO,AAAMC;;;AAGb,AAAO,AAAMC;;;;AAIb,AAAO,AAAM9E;AAiDb,AAAO,AAAMC,AAAsCD;;;;;;AAMnD,IAAME,wBAAsBN,gCAA8BT,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,AAAMgB,AAAkCD,AAAX;;AAEpC,IAAME,wBAAsBP,gCAA8BV,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,AAAMkB,AAAkCD,AAAX;;AAEpC,AAAO,AAAM2E,AAA8B3E,AAAhB,AAAyCF,AAAzC;;AAE3B,AAAO,IAAM8E,yBAAuB,IAAItG,MAAJ,CAAW,mBAAX,EAAgC,GAAhC,CAA7B;AACP,AAAO,IAAMuG,uBAAqB,IAAIvG,MAAJ,CAAW,4BAAX,EAAyC,GAAzC,CAA3B;AACP,AAAO,IAAMwG,aAAW,IAAIxG,MAAJ,CAAW,kBAAX,EAA+B,GAA/B,CAAjB,CAEP,AAAO,AAAMyG;;ACzSb;AACA,AAAe,SAASC,SAAT,CAAmBhE,IAAnB,EAAyB;MAChCiE,UAAUjE,KAAKgC,IAAL,CAAU,OAAV,CAAhB;MACMkC,KAAKlE,KAAKgC,IAAL,CAAU,IAAV,CAAX;MACImC,QAAQ,CAAZ;;MAEID,EAAJ,EAAQ;;QAEFd,oBAAkB9C,IAAlB,CAAuB4D,EAAvB,CAAJ,EAAgC;eACrB,EAAT;;QAEEX,oBAAkBjD,IAAlB,CAAuB4D,EAAvB,CAAJ,EAAgC;eACrB,EAAT;;;;MAIAD,OAAJ,EAAa;QACPE,UAAU,CAAd,EAAiB;;;UAGXf,oBAAkB9C,IAAlB,CAAuB2D,OAAvB,CAAJ,EAAqC;iBAC1B,EAAT;;UAEEV,oBAAkBjD,IAAlB,CAAuB2D,OAAvB,CAAJ,EAAqC;iBAC1B,EAAT;;;;;;;QAOAf,iBAAe5C,IAAf,CAAoB2D,OAApB,CAAJ,EAAkC;eACvB,EAAT;;;;;;;QAOEZ,oBAAkB/C,IAAlB,CAAuB2D,OAAvB,CAAJ,EAAqC;eAC1B,EAAT;;;;SAIGE,KAAP;;;ACnDF;;;AAGA,AAAe,SAASC,QAAT,CAAkBlE,KAAlB,EAAyB;;SAE/BmE,WAAWnE,MAAM8B,IAAN,CAAW,OAAX,CAAX,KAAmC,IAA1C;;;ACLF;AACA,AAAe,SAASsC,WAAT,CAAqB5C,IAArB,EAA2B;SACjC,CAACA,KAAK6C,KAAL,CAAW,IAAX,KAAoB,EAArB,EAAyBzD,MAAhC;;;ACFF,IAAM0D,QAAQ,IAAIlH,MAAJ,CAAW,WAAX,EAAwB,GAAxB,CAAd;;AAEA,AAAe,SAASmH,WAAT,CAAqBC,UAArB,EAAgD;MAAf9E,OAAe,uEAAL,GAAK;;MACvD+E,SAASD,aAAa,EAA5B;;MAEIC,SAAS,CAAb,EAAgB;QACVC,oBAAJ;;;;;;;QAOIJ,MAAMlE,IAAN,CAAWV,OAAX,CAAJ,EAAyB;oBACT+E,SAAS,CAAvB;KADF,MAEO;oBACSA,SAAS,IAAvB;;;WAGKE,KAAKC,GAAL,CAASD,KAAKE,GAAL,CAASH,WAAT,EAAsB,CAAtB,CAAT,EAAmC,CAAnC,CAAP;;;SAGK,CAAP;;;ACjBF;;AAEA,AAAe,SAASI,iBAAT,CAAwBhF,IAAxB,EAA8B;MACvCmE,QAAQ,CAAZ;MACMzC,OAAO1B,KAAK0B,IAAL,GAAYuD,IAAZ,EAAb;MACMP,aAAahD,KAAKZ,MAAxB;;;MAGI4D,aAAa,EAAjB,EAAqB;WACZ,CAAP;;;;WAIOJ,YAAY5C,IAAZ,CAAT;;;;WAIS+C,YAAYC,UAAZ,CAAT;;;;;;MAMIhD,KAAKwD,KAAL,CAAW,CAAC,CAAZ,MAAmB,GAAvB,EAA4B;aACjB,CAAT;;;SAGKf,KAAP;;;AChCa,SAASgB,QAAT,CAAkBjF,KAAlB,EAAyBf,CAAzB,EAA4BgF,KAA5B,EAAmC;QAC1CnC,IAAN,CAAW,OAAX,EAAoBmC,KAApB;SACOjE,KAAP;;;ACGa,SAASkF,WAAT,CAAkBlF,KAAlB,EAAyBf,CAAzB,EAA4BkG,MAA5B,EAAoC;MAC7C;QACIlB,QAAQmB,kBAAepF,KAAf,EAAsBf,CAAtB,IAA2BkG,MAAzC;aACSnF,KAAT,EAAgBf,CAAhB,EAAmBgF,KAAnB;GAFF,CAGE,OAAOoB,CAAP,EAAU;;;;SAILrF,KAAP;;;ACXF;AACA,AAAe,SAASsF,cAAT,CAAqBxF,IAArB,EAA2Bb,CAA3B,EAA8BgF,KAA9B,EAAqC;MAC5CsB,SAASzF,KAAKyF,MAAL,EAAf;MACIA,MAAJ,EAAY;gBACDA,MAAT,EAAiBtG,CAAjB,EAAoBgF,QAAQ,IAA5B;;;SAGKnE,IAAP;;;ACFF;;;AAGA,AAAe,SAASsF,iBAAT,CAAwBpF,KAAxB,EAA+Bf,CAA/B,EAAsD;MAApBuG,WAAoB,uEAAN,IAAM;;MAC/DvB,QAAQC,SAASlE,KAAT,CAAZ;;MAEIiE,KAAJ,EAAW;WACFA,KAAP;;;UAGMwB,aAAUzF,KAAV,CAAR;;MAEIwF,WAAJ,EAAiB;aACN1B,UAAU9D,KAAV,CAAT;;;iBAGUA,KAAZ,EAAmBf,CAAnB,EAAsBgF,KAAtB;;SAEOA,KAAP;;;AClBF;;AAEA,AAAe,SAASwB,YAAT,CAAmBzF,KAAnB,EAA0B;mBACnBA,MAAMP,GAAN,CAAU,CAAV,CADmB;MAC/BC,OAD+B,cAC/BA,OAD+B;;;;;;;MAMnCgE,uBAAqBtD,IAArB,CAA0BV,OAA1B,CAAJ,EAAwC;WAC/BoF,kBAAe9E,KAAf,CAAP;GADF,MAEO,IAAIN,QAAQC,WAAR,OAA0B,KAA9B,EAAqC;WACnC,CAAP;GADK,MAEA,IAAIgE,qBAAmBvD,IAAnB,CAAwBV,OAAxB,CAAJ,EAAsC;WACpC,CAAP;GADK,MAEA,IAAIkE,WAASxD,IAAT,CAAcV,OAAd,CAAJ,EAA4B;WAC1B,CAAC,CAAR;GADK,MAEA,IAAIA,QAAQC,WAAR,OAA0B,IAA9B,EAAoC;WAClC,CAAC,CAAR;;;SAGK,CAAP;;;ACjBF,SAASkB,cAAT,CAAsBb,KAAtB,EAA6Bf,CAA7B,EAAgC;MAC1Be,MAAMP,GAAN,CAAU,CAAV,CAAJ,EAAkB;qBACIO,MAAMP,GAAN,CAAU,CAAV,CADJ;QACRC,OADQ,cACRA,OADQ;;QAGZA,YAAY,MAAhB,EAAwB;;uBAERM,KAAd,EAAqBf,CAArB,EAAwB,KAAxB;;;;;AAKN,SAASyG,UAAT,CAAoB1F,KAApB,EAA2Bf,CAA3B,EAA8BgF,KAA9B,EAAqC;MAC/BjE,KAAJ,EAAW;mBACIA,KAAb,EAAoBf,CAApB;gBACSe,KAAT,EAAgBf,CAAhB,EAAmBgF,KAAnB;;;;AAIJ,SAAS0B,OAAT,CAAiB1G,CAAjB,EAAoBuG,WAApB,EAAiC;IAC7B,QAAF,EAAYnD,GAAZ,CAAgB,SAAhB,EAA2BlD,IAA3B,CAAgC,UAACC,KAAD,EAAQU,IAAR,EAAiB;;;QAG3CE,QAAQf,EAAEa,IAAF,CAAZ;YACQmF,SAASjF,KAAT,EAAgBf,CAAhB,EAAmBmG,kBAAepF,KAAf,EAAsBf,CAAtB,EAAyBuG,WAAzB,CAAnB,CAAR;;QAEMI,UAAU5F,MAAMuF,MAAN,EAAhB;QACMM,WAAWJ,aAAUzF,KAAV,CAAjB;;eAEW4F,OAAX,EAAoB3G,CAApB,EAAuB4G,QAAvB,EAAiCL,WAAjC;QACII,OAAJ,EAAa;;;iBAGAA,QAAQL,MAAR,EAAX,EAA6BtG,CAA7B,EAAgC4G,WAAW,CAA3C,EAA8CL,WAA9C;;GAbJ;;SAiBOvG,CAAP;CAGF;;ACjDA,IAAM6G,eAAe,SAArB;;AAEA,AAAe,SAASC,eAAT,CAAyBvE,IAAzB,EAA+B;SACrCA,KAAKwE,OAAL,CAAaF,YAAb,EAA2B,GAA3B,EAAgCf,IAAhC,EAAP;;;ACHF;;;;0CAKA;;ACLA;;;;;;;;;;;;;;;;AAgBA,AAAO,IAAMkB,kBAAkB,IAAI7I,MAAJ,CAAW,0EAAX,EAAuF,GAAvF,CAAxB;;AAEP,AAAO,IAAM8I,eAAe,QAArB;;AAEP,AAAO,IAAMC,cAAc,WAApB;AACP,AAAO,IAAMC,cAAc,WAApB;;ACZP,SAASC,aAAT,CAAuBC,OAAvB,EAAgClH,KAAhC,EAAuCmH,sBAAvC,EAA+D;MACzDC,cAAc,IAAlB;;;;MAIIpH,QAAQ,CAAR,IAAagH,YAAYhG,IAAZ,CAAiBkG,OAAjB,CAAb,IAA0CA,QAAQ1F,MAAR,GAAiB,CAA/D,EAAkE;kBAClD,IAAd;;;;;MAKExB,UAAU,CAAV,IAAekH,QAAQ3G,WAAR,OAA0B,OAA7C,EAAsD;kBACtC,KAAd;;;;;MAKEP,QAAQ,CAAR,IAAakH,QAAQ1F,MAAR,GAAiB,CAA9B,IAAmC,CAAC2F,sBAAxC,EAAgE;kBAChD,KAAd;;;SAGKC,WAAP;CAGF;;ACjCA;;AAEA,IAAMC,kBAAkB,IAAIrJ,MAAJ,CAAW,QAAX,CAAxB;AACA,AAAe,SAASsJ,cAAT,CAAwBlF,IAAxB,EAA8B;SACpCiF,gBAAgBrG,IAAhB,CAAqBoB,IAArB,CAAP;;;ACKF;;;;;AAKA,AAAe,SAASmF,aAAT,CAAuBC,UAAvB,EAAmCC,QAAnC,EAA6C5H,CAA7C,EAAgD;MACzD,CAAC2H,WAAWrB,MAAX,GAAoB3E,MAAzB,EAAiC;WACxBgG,UAAP;;;MAGIE,wBAAwBnC,KAAKE,GAAL,CAAS,EAAT,EAAagC,WAAW,IAAxB,CAA9B;MACME,cAAc9H,EAAE,aAAF,CAApB;;aAEWsG,MAAX,GAAoB5E,QAApB,GAA+BxB,IAA/B,CAAoC,UAACC,KAAD,EAAQa,OAAR,EAAoB;QAChD+G,WAAW/H,EAAEgB,OAAF,CAAjB;;QAEI4C,4BAA0BzC,IAA1B,CAA+BH,QAAQP,OAAvC,CAAJ,EAAqD;aAC5C,IAAP;;;QAGIuH,eAAe/C,SAAS8C,QAAT,CAArB;QACIC,YAAJ,EAAkB;UACZD,SAASvH,GAAT,CAAa,CAAb,MAAoBmH,WAAWnH,GAAX,CAAe,CAAf,CAAxB,EAA2C;oBAC7ByH,MAAZ,CAAmBF,QAAnB;OADF,MAEO;YACDG,eAAe,CAAnB;YACMC,UAAUC,YAAYL,QAAZ,CAAhB;;;;YAIII,UAAU,IAAd,EAAoB;0BACF,EAAhB;;;;;YAKEA,WAAW,GAAf,EAAoB;0BACF,EAAhB;;;;;YAKEJ,SAASlF,IAAT,CAAc,OAAd,MAA2B8E,WAAW9E,IAAX,CAAgB,OAAhB,CAA/B,EAAyD;0BACvC+E,WAAW,GAA3B;;;YAGIS,WAAWL,eAAeE,YAAhC;;YAEIG,YAAYR,qBAAhB,EAAuC;iBAC9BC,YAAYG,MAAZ,CAAmBF,QAAnB,CAAP;SADF,MAEO,IAAI/G,QAAQP,OAAR,KAAoB,GAAxB,EAA6B;cAC5B6H,iBAAiBP,SAASxF,IAAT,EAAvB;cACMgG,uBAAuBhD,WAAW+C,cAAX,CAA7B;;cAEIC,uBAAuB,EAAvB,IAA6BJ,UAAU,IAA3C,EAAiD;mBACxCL,YAAYG,MAAZ,CAAmBF,QAAnB,CAAP;WADF,MAEO,IAAIQ,wBAAwB,EAAxB,IAA8BJ,YAAY,CAA1C,IACDV,eAAea,cAAf,CADH,EACmC;mBACjCR,YAAYG,MAAZ,CAAmBF,QAAnB,CAAP;;;;;;WAMD,IAAP;GAnDF;;MAsDID,YAAYpG,QAAZ,GAAuBC,MAAvB,KAAkC,CAAlC,IACFmG,YAAYpG,QAAZ,GAAuB8G,KAAvB,GAA+BhI,GAA/B,CAAmC,CAAnC,MAA0CmH,WAAWnH,GAAX,CAAe,CAAf,CAD5C,EAC+D;WACtDmH,UAAP;;;SAGKG,WAAP;;;ACjFF,UACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA;;ACDA,SAASW,mBAAT,CAA6B1H,KAA7B,EAAoCf,CAApC,EAAuC0I,MAAvC,EAA+C;;;;;MAKzC3H,MAAM4H,QAAN,CAAe,qBAAf,CAAJ,EAA2C;;;;MAIrCC,UAAU9B,gBAAgB/F,MAAMwB,IAAN,EAAhB,CAAhB;;MAEI4C,YAAYyD,OAAZ,IAAuB,EAA3B,EAA+B;QACvBC,SAAS7I,EAAE,GAAF,EAAOe,KAAP,EAAcY,MAA7B;QACMmH,aAAa9I,EAAE,OAAF,EAAWe,KAAX,EAAkBY,MAArC;;;QAGImH,aAAcD,SAAS,CAA3B,EAA+B;YACvBlI,MAAN;;;;QAIIoI,gBAAgBH,QAAQjH,MAA9B;QACMqH,WAAWhJ,EAAE,KAAF,EAASe,KAAT,EAAgBY,MAAjC;;;;QAIIoH,gBAAgB,EAAhB,IAAsBC,aAAa,CAAvC,EAA0C;YAClCrI,MAAN;;;;QAIIwH,UAAUC,YAAYrH,KAAZ,CAAhB;;;;;QAKI2H,SAAS,EAAT,IAAeP,UAAU,GAAzB,IAAgCY,gBAAgB,EAApD,EAAwD;YAChDpI,MAAN;;;;;;QAME+H,UAAU,EAAV,IAAgBP,UAAU,GAA9B,EAAmC;;;;UAI3B1H,UAAUM,MAAMP,GAAN,CAAU,CAAV,EAAaC,OAAb,CAAqBC,WAArB,EAAhB;UACMuI,aAAaxI,YAAY,IAAZ,IAAoBA,YAAY,IAAnD;UACIwI,UAAJ,EAAgB;YACRC,eAAenI,MAAMoI,IAAN,EAArB;YACID,gBAAgBpC,gBAAgBoC,aAAa3G,IAAb,EAAhB,EAAqCwD,KAArC,CAA2C,CAAC,CAA5C,MAAmD,GAAvE,EAA4E;;;;;YAKxEpF,MAAN;;;;QAIIyI,cAAcpJ,EAAE,QAAF,EAAYe,KAAZ,EAAmBY,MAAvC;;;QAGIyH,cAAc,CAAd,IAAmBL,gBAAgB,GAAvC,EAA4C;YACpCpI,MAAN;;;;CAMN;;ACjFA;AACA,AAOA,SAAS0I,UAAT,CAAoBrJ,CAApB,EAAuBsJ,OAAvB,EAAgCzG,IAAhC,EAAsC0G,QAAtC,EAAgD;UACxC1G,IAAN,QAAe0G,QAAf,EAAyBrJ,IAAzB,CAA8B,UAACsJ,CAAD,EAAI3I,IAAJ,EAAa;QACnCoB,QAAQC,SAASrB,IAAT,CAAd;QACM4I,MAAMxH,MAAMY,IAAN,CAAZ;;QAEI4G,GAAJ,EAAS;UACDC,cAAcC,IAAIC,OAAJ,CAAYN,OAAZ,EAAqBG,GAArB,CAApB;cACQ5I,IAAR,EAAcgC,IAAd,EAAoB6G,WAApB;;GANJ;;;AAWF,AAAe,SAASG,oBAAT,CAA2BN,QAA3B,EAAqCvJ,CAArC,EAAwCyJ,GAAxC,EAA6C;GACzD,MAAD,EAAS,KAAT,EAAgBK,OAAhB,CAAwB;WAAQT,WAAWrJ,CAAX,EAAcyJ,GAAd,EAAmB5G,IAAnB,EAAyB0G,QAAzB,CAAR;GAAxB;;SAEOA,QAAP;;;ACvBK,SAAShE,UAAT,CAAoBhD,IAApB,EAA0B;SACxBA,KAAKuD,IAAL,GACKiB,OADL,CACa,MADb,EACqB,GADrB,EAEKpF,MAFZ;;;;;;AAQF,AAAO,SAASyG,WAAT,CAAqBrH,KAArB,EAA4B;MAC3BgJ,kBAAkBxE,WAAWxE,MAAMwB,IAAN,EAAX,CAAxB;;MAEMyH,WAAWjJ,MAAMyC,IAAN,CAAW,GAAX,EAAgBjB,IAAhB,EAAjB;MACM0H,aAAa1E,WAAWyE,QAAX,CAAnB;;MAEID,kBAAkB,CAAtB,EAAyB;WAChBE,aAAaF,eAApB;GADF,MAEO,IAAIA,oBAAoB,CAApB,IAAyBE,aAAa,CAA1C,EAA6C;WAC3C,CAAP;;;SAGK,CAAP;;;ACnBF,SAASC,UAAT,CAAoBnJ,KAApB,EAA2BoJ,WAA3B,EAAwC;;;MAGlCpJ,MAAMW,QAAN,GAAiBC,MAAjB,GAA0BwI,WAA9B,EAA2C;WAClC,KAAP;;;MAGEC,iBAAcrJ,KAAd,CAAJ,EAA0B;WACjB,KAAP;;;SAGK,IAAP;CAGF;;AChBA;AACA,AAAe,SAASsJ,SAAT,CAAmB9H,IAAnB,EAAyBvC,CAAzB,EAA4B;;;MAGnCsK,YAAYtK,aAAWuC,IAAX,cAA0BA,IAA1B,EAAlB;SACO+H,cAAc,EAAd,GAAmB/H,IAAnB,GAA0B+H,SAAjC;;;ACHa,SAASF,gBAAT,CAAuBrJ,KAAvB,EAA8B;MACrCgB,UAAUhB,MAAMgB,OAAN,GAAgBwI,OAAhB,EAAhB;MACMC,gBAAgBzI,QAAQyB,IAAR,CAAa,UAAC8C,MAAD,EAAY;QACvCrE,QAAQC,SAASoE,MAAT,CAAd;QACemE,SAF8B,GAEZxI,KAFY,CAErCyI,KAFqC;QAEnB3F,EAFmB,GAEZ9C,KAFY,CAEnB8C,EAFmB;;QAGvC4F,aAAgBF,SAAhB,SAA6B1F,EAAnC;WACO4F,WAAWC,QAAX,CAAoB,SAApB,CAAP;GAJoB,CAAtB;;SAOOJ,kBAAkBK,SAAzB;;;ACXF;;kBAIA;;ACJe,SAAS3I,QAAT,CAAkBrB,IAAlB,EAAwB;MAC7BiK,OAD6B,GACLjK,IADK,CAC7BiK,OAD6B;MACpBC,UADoB,GACLlK,IADK,CACpBkK,UADoB;;;MAGjC,CAACD,OAAD,IAAYC,UAAhB,EAA4B;QACpB9I,QAAQ,iBAAgB8I,UAAhB,EAA4BtH,MAA5B,CAAmC,UAACC,GAAD,EAAMvD,KAAN,EAAgB;UACzD0C,OAAOkI,WAAW5K,KAAX,CAAb;;UAEI,CAAC0C,KAAKmI,IAAN,IAAc,CAACnI,KAAKoI,KAAxB,EAA+B,OAAOvH,GAAP;;UAE3Bb,KAAKmI,IAAT,IAAiBnI,KAAKoI,KAAtB;aACOvH,GAAP;KANY,EAOX,EAPW,CAAd;WAQOzB,KAAP;;;SAGK6I,OAAP;;;ACfa,SAASI,OAAT,CAAiBrK,IAAjB,EAAuBgC,IAAvB,EAA6BsI,GAA7B,EAAkC;MAC3CtK,KAAKiK,OAAT,EAAkB;SACXA,OAAL,CAAajI,IAAb,IAAqBsI,GAArB;GADF,MAEO,IAAItK,KAAKkK,UAAT,EAAqB;SACrBK,YAAL,CAAkBvI,IAAlB,EAAwBsI,GAAxB;;;SAGKtK,IAAP;;;ACPF;AACA,AAAe,SAASwK,QAAT,CAAkBxK,IAAlB,EAAwBoB,KAAxB,EAA+B;MACxCpB,KAAKiK,OAAT,EAAkB;SACXA,OAAL,GAAe7I,KAAf;GADF,MAEO,IAAIpB,KAAKkK,UAAT,EAAqB;WACpBlK,KAAKkK,UAAL,CAAgBpJ,MAAhB,GAAyB,CAA/B;WACO2J,eAAL,CAAqBzK,KAAKkK,UAAL,CAAgB,CAAhB,EAAmBC,IAAxC;KAEF,iBAAgB/I,KAAhB,EAAuB6H,OAAvB,CAA+B,eAAO;WAC/BsB,YAAL,CAAkBhJ,GAAlB,EAAuBH,MAAMG,GAAN,CAAvB;KADF;;;SAKKvB,IAAP;;;ACbF,mBACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA;;ACvBA,0BAAA,CAA0B0K,EAA1B,CAA8B,CAAE,WAAe,UAAA,mCAAOA,EAAP,KAAc,QAArB,EAAkC,cAAnC,CAAsDA,GAAG,SAAH,CAAtD,CAAsEA,EAA7E,CAAkF,CAElH,wBAA0BC,kBAAgBC,WAAhB,CAA1B,CACA,eAAeD,kBAAgBE,QAAhB,CAAf,CACA,sBAAwBF,kBAAgBG,gBAAhB,CAAxB,CACA,UAAUH,kBAAgBI,GAAhB,CAAV,CACA,cAAcJ,kBAAgBK,OAAhB,CAAd,CACA,aAAeL,kBAAgBM,OAAhB,CAAf,CACA,cAAcN,kBAAgBO,OAAhB,CAAd,CACA,uBAAuBP,kBAAgBQ,gBAAhB,CAAvB,CACA,yBAAyBR,kBAAgBS,kBAAhB,CAAzB,CACA,sBAAsBT,kBAAgBU,eAAhB,CAAtB,CACA,qBAAqBV,kBAAgBW,cAAhB,CAArB,CACA,cAAcX,kBAAgBY,OAAhB,CAAd,CACA,mBAAmBZ,kBAAgBa,YAAhB,CAAnB,CACA,iBAAmBb,kBAAgBc,IAAhB,CAAnB,CACA,sBAAsBd,kBAAgBe,eAAhB,CAAtB,CACA,eAAef,kBAAgBgB,QAAhB,CAAf,CACA,aAAahB,kBAAgBiB,MAAhB,CAAb,CACA,YAAYjB,kBAAgBkB,KAAhB,CAAZ,CACA,cAAclB,kBAAgBmB,OAAhB,CAAd,CACA,gBAAkBnB,kBAAgBoB,IAAhB,CAAlB,CACA,gBAAgBpB,kBAAgBqB,SAAhB,CAAhB,CAEA,YAAc,CAACC,KAAD,EAAQrO,GAAR,CAAYsO,oBAAoBC,IAAhC,CAAd,CAEA,cAAA,EAAiB,CACf,UAAYC,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,CAAhF,CACA,QAAUA,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,CAA9E,CACA,2BAA2BC,IAApB,CAAyB,eAAA,CAAgBC,QAAhB,CAA0B,CACxD,MAAO,CAAP,CAAU,CACR,OAAQA,SAAShE,IAAT,CAAgBgE,SAAS5M,IAAjC,EACE,MAAA,CACE,GAAI,EAAE6M,OAASC,GAAX,CAAJ,CAAqB,CACnBF,SAAS5M,IAAT,CAAgB,CAAhB,CACA,MACD,CAED4M,SAAS5M,IAAT,CAAgB,CAAhB,CACA,cAAgB,CAAhB,CAEF,MAAA,CACE4M,SAAS5M,IAAT,CAAgB,CAAhB,CACA,MAEF,MAAA,CACA,IAAK,KAAL,CACE,gBAAgB+M,IAAT,EAAP,CAhBJ,CAkBD,CACF,CArBM,CAqBJC,QAAQ,CAAR,CArBI,CAqBQ,IArBR,CAAP,CAsBD;AAGD,oBAAA,CAAqBC,IAArB,CAA2B,CACzB,aAAeA,KAAKC,QAApB;AAGA,MAAO,CAAC,CAACA,QAAT,CACD,CAED,WAAa,CACXC,OAAQ,CACNC,MAAO,IADD,CAENC,SAAU,gGAFJ,CADG,CAAb,CAOA,oBAAsB,CACpB,aAAc,6CADM,CAAtB;AAKA,kBAAoB,KAApB;AAGA,sBAAwB,CAAC,YAAD,CAAe,WAAf,CAA4B,YAA5B,CAA0C,WAA1C,CAAxB,CAEA,yBAA2B,UAAA,CAAW,KAAOC,kBAAkBjP,IAAlB,CAAuB,GAAvB,CAAP,CAAqC,IAAhD,CAAsD,GAAtD,CAA3B;;AAIA,uBAAyB,OAAzB;;;AAMA,YAAA,CAAakP,OAAb,CAAsB;AAEpB,mBAAO,CAAa,SAAUlE,OAAV,CAAmBmE,MAAnB,CAA2B,CAC7CC,UAAQF,OAAR,CAAiB,SAAUG,GAAV,CAAeC,QAAf,CAAyBC,IAAzB,CAA+B,CAC9C,GAAIF,GAAJ,CAAS,CACPF,OAAOE,GAAP,EACD,CAFD,IAEO,CACLrE,QAAQ,CAAEuE,KAAMA,IAAR,CAAcD,SAAUA,QAAxB,CAAR,EACD,CACF,CAND,EAOD,CARM,CAAP,CASD;;;;AAOD,yBAAA,CAA0BA,QAA1B,CAAoC,CAClC,gBAAkBjB,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,KAAtF;;;;;;AAQA,GAAIiB,SAASE,aAAT,EAA0BF,SAASE,aAAT,GAA2B,IAArD,EAA6DF,SAASG,UAAT,GAAwB,GAAzF,CAA8F,CAC5F,GAAI,CAACH,SAASG,UAAd,CAA0B,CACxB,eAAM,CAAU,mDAAqDH,SAASP,KAAxE,CAAN,CACD,CAFD,QAEW,CAACW,WAAL,CAAkB,CACvB,eAAM,CAAU,+CAAiDJ,SAASG,UAA1D,CAAuE,oEAAjF,CAAN,CACD,CACF,CAED,sBAAwBH,SAASK,OAAjC,CACIC,YAAcC,kBAAkB,cAAlB,CADlB,CAEI1F,cAAgB0F,kBAAkB,gBAAlB,CAFpB;AAMA,GAAIC,qBAAqBvN,IAArB,CAA0BqN,WAA1B,CAAJ,CAA4C,CAC1C,eAAM,CAAU,sCAAwCA,WAAxC,CAAsD,sBAAhE,CAAN,CACD;AAGD,GAAIzF,cAAgB4F,kBAApB,CAAwC,CACtC,eAAM,CAAU,sEAAwEA,kBAAxE,CAA6F,GAAvG,CAAN,CACD,CAED,WAAA,CACD;;;;;;;AAYD,oBAAuB,UAAY,CACjC,UAAYC,kBAAkB7B,oBAAoBC,IAApB,CAAyB,gBAAA,CAAiBvD,GAAjB,CAAsBoF,SAAtB,CAAiC,CACtF,WAAA,CAAaC,KAAb,CAAoBZ,QAApB,CAA8BC,IAA9B,CAEA,2BAA2BjB,IAApB,CAAyB,iBAAA,CAAkBC,QAAlB,CAA4B,CAC1D,MAAO,CAAP,CAAU,CACR,OAAQA,SAAShE,IAAT,CAAgBgE,SAAS5M,IAAjC,EACE,MAAA,CACEsO,UAAYA,WAAalF,MAAIoF,KAAJ,CAAUC,UAAUvF,GAAV,CAAV,CAAzB,CAEAqE,QAAU,CACRrE,IAAKoF,SADG,CAERN,QAASU,WAAS,EAAT,CAAaC,eAAb,CAFD,CAGRC,QAASC,aAHD;;AAMRC,SAAU,IANF;AAQRC,IAAK,IARG;AAURC,KAAM,IAVE;AAYRC,mBAAoB,IAZZ,CAAV,CAcArC,SAAS5M,IAAT,CAAgB,CAAhB,CACA,WAAWuN,OAAJ,CAAP,CAEF,MAAA,CACEgB,MAAQ3B,SAASsC,IAAjB,CACAvB,SAAWY,MAAMZ,QAAjB,CACAC,KAAOW,MAAMX,IAAb,CACAhB,SAAShE,IAAT,CAAgB,CAAhB,CAEAuG,iBAAiBxB,QAAjB,EACA,gBAAgByB,MAAT,CAAgB,QAAhB,CAA0B,CAC/BxB,KAAMA,IADyB,CAE/BD,SAAUA,QAFqB,CAA1B,CAAP,CAKF,OAAA,CACEf,SAAShE,IAAT,CAAgB,EAAhB,CACAgE,SAASyC,EAAT,CAAczC,SAAS,OAAT,EAAkB,CAAlB,CAAd,CACA,gBAAgBwC,MAAT,CAAgB,QAAhB,CAA0BE,OAAOnC,MAAjC,CAAP,CAEF,OAAA,CACA,IAAK,KAAL,CACE,gBAAgBJ,IAAT,EAAP,CAxCJ,CA0CD,CACF,CA7CM,CA6CJwC,OA7CI,CA6CK,IA7CL,CA6CW,CAAC,CAAC,CAAD,CAAI,EAAJ,CAAD,CA7CX,CAAP,CA8CD,CAjD6B,CAAlB,CAAZ,CAmDA,sBAAA,CAAuBC,GAAvB,CAA4BC,GAA5B,CAAiC,CAC/B,aAAaC,KAAN,CAAY,IAAZ,CAAkBhD,SAAlB,CAAP,CACD,CAED,oBAAA,CACD,CAzDqB,EAAtB,CA2DA,wBAAA,CAAyBjN,CAAzB,CAA4BkQ,OAA5B,CAAkCC,EAAlC,CAAsC,CACpCnQ,EAAE,QAAUkQ,OAAV,CAAiB,GAAnB,EAAwBhQ,IAAxB,CAA6B,SAAUsJ,CAAV,CAAa3I,IAAb,CAAmB,CAC9C,UAAYb,EAAEa,IAAF,CAAZ,CAEA,UAAYE,MAAM8B,IAAN,CAAWqN,OAAX,CAAZ,CACAnP,MAAM8B,IAAN,CAAWsN,EAAX,CAAelF,KAAf,EACAlK,MAAMgC,UAAN,CAAiBmN,OAAjB,EACD,CAND,EAQA,QAAA,CACD;;;;;;AASD,0BAAA,CAA2BlQ,CAA3B,CAA8B,CAC5BA,EAAIoQ,gBAAgBpQ,CAAhB,CAAmB,SAAnB,CAA8B,OAA9B,CAAJ,CACAA,EAAIoQ,gBAAgBpQ,CAAhB,CAAmB,UAAnB,CAA+B,MAA/B,CAAJ,CACA,QAAA,CACD;AAGD,gBAAgB,UAAA,CAAW,gCAAX,CAA6C,GAA7C,CAAhB;;AAIA,iBAAiB,qBAAjB,CAEA,qBAAqB,CAAC,wCAAD,CAA2C,uCAA3C,CAAoF,qCAApF,CAA2H,oCAA3H,CAArB;AAGA,wBAAwB,CAAC,OAAD,CAAU,QAAV,CAAoB,UAApB,CAAgC,MAAhC,CAAwC,OAAxC,CAAiD,IAAjD,CAAuD,OAAvD,CAAgE,QAAhE,CAA0E,QAA1E,CAAxB;AAGA,mBAAmB,CAAC,OAAD,CAAU,OAAV,CAAnB,CACA,4BAA4BzB,eAAaE,GAAb,CAAiB,SAAUC,QAAV,CAAoB,CAC/D,MAAO,IAAMA,QAAN,CAAiB,GAAxB,CACD,CAF2B,CAA5B,CAGA,uBAAuBH,eAAaK,IAAb,CAAkB,GAAlB,CAAvB,CACA,sBAAsB,CAAC,KAAD,CAAQ,QAAR,CAAkB,MAAlB,CAA0B,OAA1B,CAAmC,IAAnC,CAAyC,KAAzC,CAAtB,CACA,yBAAyB,UAAA,CAAW,KAAOC,kBAAgBD,IAAhB,CAAqB,GAArB,CAAP,CAAmC,IAA9C,CAAoD,GAApD,CAAzB;AAGA,wBAAwB,CAAC,GAAD,CAAxB,CACA,6BAA6BG,oBAAkBN,GAAlB,CAAsB,SAAUQ,GAAV,CAAe,CAChE,WAAa,QAAb,CACD,CAF4B,EAE1BL,IAF0B,CAErB,GAFqB,CAA7B;AAKA,+BAA+B,CAAC,IAAD,CAAO,IAAP,CAAa,OAAb,CAAsB,KAAtB,CAA6B,QAA7B,CAAuC,MAAvC,EAA+CA,IAA/C,CAAoD,GAApD,CAA/B;AAGA,kBAAkB,CAAC,IAAD,CAAO,IAAP,CAAa,IAAb,CAAmB,IAAnB,CAAyB,IAAzB,CAAlB,CACA,sBAAsBO,cAAYP,IAAZ,CAAiB,GAAjB,CAAtB;;;;;AAQA,oCAAoC,CAAC,UAAD,CAAa,OAAb,CAAsB,QAAtB,CAAgC,SAAhC,CAA2C,SAA3C,CAAsD,KAAtD,CAA6D,gBAA7D,CAA+E,OAA/E,CAAwF,SAAxF,CAAmG,cAAnG,CAAmH,QAAnH,CAA6H,iBAA7H,CAAgJ,OAAhJ,CAAyJ,MAAzJ;AAEpC,QAFoC,CAE1B,QAF0B,CAEhB,QAFgB,CAEN,OAFM;AAGpC,MAHoC,CAG5B,MAH4B,CAGpB,KAHoB,CAGb,UAHa,CAGD,OAHC,CAGQ,YAHR,CAGsB,UAHtB;AAIpC,2BAJoC;AAKpC,OALoC,CAK3B,eAL2B,CAKV,SALU,CAKC,QALD,CAKW,QALX,CAKqB,KALrB,CAK4B,OAL5B,CAKqC,UALrC,CAKiD,SALjD,CAK4D,UAL5D,CAKwE,SALxE,CAKmF,SALnF,CAK8F,OAL9F,CAApC;;;;;;;;;;;AAkBA,oCAAoC,CAAC,KAAD,CAAQ,SAAR,CAAmB,MAAnB,CAA2B,WAA3B,CAAwC,QAAxC,CAAkD,SAAlD,CAA6D,qBAA7D,CAAoF,QAApF;AACpC,OADoC,CAC3B,QAD2B,CACjB,OADiB,CACR,MADQ,CACA,MADA,CACQ,OADR,CACiB,QADjB,CAApC;;;AAMA,0BAA0B,CAAC,GAAD,CAAM,YAAN,CAAoB,IAApB,CAA0B,KAA1B,CAAiC,KAAjC,CAAwC,GAAxC,CAA6C,KAA7C,CAAoD,OAApD,EAA6DA,IAA7D,CAAkE,GAAlE,CAA1B;;;;;;;;;AAoBA,2BAA2B,CAAC,SAAD,CAAY,gBAAZ,CAA8B,iBAA9B,CAAiD,MAAjD,CAAyD,MAAzD,CAAiE,SAAjE,CAA4E,qBAA5E,CAAmG,OAAnG,CAA4G,QAA5G,CAAsH,MAAtH,CAA8H,QAA9H,CAAwI,MAAxI,CAAgJ,YAAhJ,CAA8J,WAA9J,CAA2K,MAA3K,CAAmL,OAAnL,CAA4L,MAA5L,CAAoM,UAApM;AAC3B,SAD2B,CAA3B;AAIA,wBAAwB,UAAA,CAAWoF,uBAAqBpF,IAArB,CAA0B,GAA1B,CAAX,CAA2C,GAA3C,CAAxB;;;;;AASA,2BAA2B,CAAC,OAAD,CAAU,QAAV,CAAoB,QAApB,CAA8B,KAA9B,CAAqC,UAArC,CAAiD,QAAjD,CAA2D,QAA3D,CAAqE,OAArE,CAA8E,MAA9E,CAAsF,OAAtF,CAA+F,SAA/F,CAA0G,YAA1G,CAAwH,SAAxH,CAAmI,MAAnI,CAA2I,QAA3I,CAAqJ,OAArJ,CAA8J,MAA9J,CAAsK,MAAtK,CAA8K,SAA9K,CAAyL,UAAzL;AAC3B,MAD2B,CACnB,QADmB,CACT,UADS,CACG,MADH,CACW,MADX,CACmB,MADnB,CAC2B,UAD3B;AAE3B,mBAF2B,CAEN,MAFM,CAEE,WAFF,CAEe,MAFf,CAEuB,UAFvB,CAEmC,OAFnC,CAE4C,MAF5C,CAEoD,OAFpD,CAE6D,UAF7D;AAG3B,OAH2B,CAGlB,KAHkB;AAI3B,SAJ2B,CAIhB,SAJgB,CAIL,cAJK;AAK3B,QAL2B,CAKjB,WALiB,CAKJ,OALI,CAKK,UALL,CAKiB,UALjB,CAK6B,MAL7B,CAKqC,SALrC,CAKgD,SALhD,CAK2D,OAL3D,CAKoE,KALpE,CAK2E,SAL3E,CAKsF,MALtF,CAK8F,OAL9F,CAKuG,QALvG,CAA3B;AAOA,wBAAwB,UAAA,CAAWuF,uBAAqBvF,IAArB,CAA0B,GAA1B,CAAX,CAA2C,GAA3C,CAAxB;AAGA,qBAAqB,wCAArB;;;;AAWA,cAAc,UAAA,CAAW,iBAAX,CAA8B,GAA9B,CAAd;;;;;;;;;;;;AAwBA,uBAAuB,CAAC,SAAD,CAAY,OAAZ,CAAqB,YAArB,CAAmC,MAAnC,CAA2C,IAA3C,CAAiD,QAAjD,CAA2D,QAA3D,CAAqE,SAArE,CAAgF,KAAhF,CAAuF,UAAvF,CAAmG,IAAnG,CAAyG,KAAzG,CAAgH,IAAhH,CAAsH,IAAtH,CAA4H,OAA5H,CAAqI,UAArI,CAAiJ,YAAjJ,CAA+J,QAA/J,CAAyK,QAAzK,CAAmL,MAAnL,CAA2L,IAA3L,CAAiM,IAAjM,CAAuM,IAAvM,CAA6M,IAA7M,CAAmN,IAAnN,CAAyN,IAAzN,CAA+N,QAA/N,CAAyO,QAAzO,CAAmP,IAAnP,CAAyP,IAAzP,CAA+P,KAA/P,CAAsQ,QAAtQ,CAAgR,IAAhR,CAAsR,QAAtR,CAAgS,GAAhS,CAAqS,KAArS,CAA4S,UAA5S,CAAwT,SAAxT,CAAmU,OAAnU,CAA4U,OAA5U,CAAqV,UAArV,CAAiW,OAAjW,CAA0W,IAA1W,CAAgX,OAAhX,CAAyX,IAAzX,CAA+X,IAA/X,CAAqY,OAArY,CAAvB,CACA,0BAA0B,UAAA,CAAW,KAAOa,mBAAiBb,IAAjB,CAAsB,GAAtB,CAAP,CAAoC,IAA/C,CAAqD,GAArD,CAA1B;;;;AAMA,0BAA0BS,gCAA8BT,IAA9B,CAAmC,GAAnC,CAA1B,CACA,2BAA2B,UAAA,CAAWe,qBAAX,CAAgC,GAAhC,CAA3B,CAEA,0BAA0BL,gCAA8BV,IAA9B,CAAmC,GAAnC,CAA1B,CACA,2BAA2B,UAAA,CAAWiB,qBAAX,CAAgC,GAAhC,CAA3B,CAEA,kCAAA,CAAiCG,CAAjC,CAAoC;;;;;;;;;AAUlCA,EAAE,GAAF,EAAOoD,GAAP,CAAW,GAAX,EAAgBlD,IAAhB,CAAqB,SAAUC,KAAV,CAAiBU,IAAjB,CAAuB,CAC1C,UAAYb,EAAEa,IAAF,CAAZ,CACA,YAAcE,MAAM8B,IAAN,CAAW,OAAX,CAAd,CACA,OAAS9B,MAAM8B,IAAN,CAAW,IAAX,CAAT,CACA,GAAI,CAACkC,EAAD,EAAO,CAACD,OAAZ,CAAqB,OAErB,eAAiB,CAACA,SAAW,EAAZ,EAAkB,GAAlB,EAAyBC,IAAM,EAA/B,CAAjB,CACA,GAAIjF,uBAAqBqB,IAArB,CAA0BwJ,UAA1B,CAAJ,CAA2C,CACzC,OACD,CAFD,QAEW/K,uBAAqBuB,IAArB,CAA0BwJ,UAA1B,CAAJ,CAA2C,CAChD5J,MAAMJ,MAAN,GACD,CACF,CAZD,EAcA,QAAA,CACD;;;;;;;AAWD,mBAAA,CAAoBX,CAApB,CAAuB,CACrB,eAAiB,KAAjB,CACAA,EAAE,IAAF,EAAQE,IAAR,CAAa,SAAUC,KAAV,CAAiBC,OAAjB,CAA0B,CACrC,aAAeJ,EAAEI,OAAF,CAAf,CACA,gBAAkBC,SAASE,IAAT,GAAgBC,GAAhB,CAAoB,CAApB,CAAlB,CAEA,GAAIF,aAAeA,YAAYG,OAAZ,CAAoBC,WAApB,KAAsC,IAAzD,CAA+D,CAC7DT,WAAa,IAAb,CACAI,SAASM,MAAT,GACD,CAHD,QAGWV,UAAJ,CAAgB,CACrBA,WAAa,KAAb;AAEAW,eAAaR,OAAb,CAAsBJ,CAAtB,CAAyB,IAAzB,EACD,CACF,CAZD,EAcA,QAAA,CACD;;;;;;;;;;AAaD,uBAAA,CAAsBa,IAAtB,CAA4Bb,CAA5B,CAA+B,CAC7B,OAASiN,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,KAA7E,CAEA,UAAYjN,EAAEa,IAAF,CAAZ,CAEA,GAAIC,EAAJ,CAAQ,CACN,YAAcD,KAAKI,WAAnB,CACA,MAAQjB,EAAE,SAAF,CAAR;;AAIA,MAAOgB,SAAW,EAAEA,QAAQP,OAAR,EAAmBf,sBAAoByB,IAApB,CAAyBH,QAAQP,OAAjC,CAArB,CAAlB,CAAmF,CACjF,gBAAkBO,QAAQC,WAA1B,CACAjB,EAAEgB,OAAF,EAAWI,QAAX,CAAoBF,CAApB,EACAF,QAAUC,WAAV,CACD,CAEDF,MAAMM,WAAN,CAAkBH,CAAlB,EACAH,MAAMJ,MAAN,GACA,QAAA,CACD,CAED,QAAA,CACD,CAED,sBAAA,CAAqBX,CAArB,CAAwB,CACtBA,EAAE,KAAF,EAASE,IAAT,CAAc,SAAUC,KAAV,CAAiBoB,GAAjB,CAAsB,CAClC,SAAWvB,EAAEuB,GAAF,CAAX,CACA,gBAAkBC,KAAKE,QAAL,CAAcnC,qBAAd,EAAmCoC,MAAnC,GAA8C,CAAhE,CAEA,GAAIF,WAAJ,CAAiB,CACf4O,iBAAiB7O,IAAjB,CAAuBxB,CAAvB,CAA0B,GAA1B,EACD,CACF,CAPD,EASA,QAAA,CACD,CAED,uBAAA,CAAsBA,CAAtB,CAAyB,CACvBA,EAAE,MAAF,EAAUE,IAAV,CAAe,SAAUC,KAAV,CAAiB0B,IAAjB,CAAuB,CACpC,UAAY7B,EAAE6B,IAAF,CAAZ,CACA,gBAAkBC,MAAMC,OAAN,CAAc,QAAd,EAAwBJ,MAAxB,GAAmC,CAArD,CACA,GAAIF,WAAJ,CAAiB,CACf4O,iBAAiBvO,KAAjB,CAAwB9B,CAAxB,CAA2B,GAA3B,EACD,CACF,CAND,EAQA,QAAA,CACD;;;;;;;;;;;AAcD,+BAAA,CAAgCA,CAAhC,CAAmC,CACjCA,EAAIsQ,WAAWtQ,CAAX,CAAJ,CACAA,EAAIsB,cAAYtB,CAAZ,CAAJ,CACAA,EAAI4B,eAAa5B,CAAb,CAAJ,CAEA,QAAA,CACD,CAED,yBAAA,CAA0Be,KAA1B,CAAiCf,CAAjC,CAAoC,CAClC,QAAUiN,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,GAA9E,CAEA,SAAWlM,MAAMP,GAAN,CAAU,CAAV,CAAX,CACA,GAAI,CAACK,IAAL,CAAW,CACT,QAAA,CACD,CACD,UAAYqB,WAASrB,IAAT,GAAkB,EAA9B,CAEA,iBAAmB0P,mBAAiBtO,KAAjB,EAAwBxD,GAAxB,CAA4B,SAAU2D,GAAV,CAAe,CAC5D,WAAa,GAAN,CAAYH,MAAMG,GAAN,CAAnB,CACD,CAFkB,EAEhBxD,IAFgB,CAEX,GAFW,CAAnB,CAGA,SAAW,MAAX,CAEA,GAAIoB,EAAEsC,OAAN,CAAe;;;AAIbD,KAAOxB,KAAKJ,OAAL,CAAaC,WAAb,KAA+B,UAA/B,CAA4CK,MAAMwB,IAAN,EAA5C,CAA2DxB,MAAMsB,IAAN,EAAlE,CACD,CALD,IAKO,CACLA,KAAOtB,MAAMyB,QAAN,EAAP,CACD,CACDzB,MAAMM,WAAN,CAAkB,IAAMpC,GAAN,CAAY,GAAZ,CAAkBkD,YAAlB,CAAiC,GAAjC,CAAuCE,IAAvC,CAA8C,IAA9C,CAAqDpD,GAArD,CAA2D,GAA7E,EACA,QAAA,CACD,CAED,yBAAA,CAAwByD,IAAxB,CAA8B1C,CAA9B,CAAiC,CAC/B,WAAa4C,SAASF,KAAKG,IAAL,CAAU,QAAV,CAAT,CAA8B,EAA9B,CAAb,CACA,UAAYD,SAASF,KAAKG,IAAL,CAAU,OAAV,CAAT,CAA6B,EAA7B,GAAoC,EAAhD;;;AAKA,GAAI,CAACF,QAAU,EAAX,EAAiB,EAAjB,EAAuBG,MAAQ,EAAnC,CAAuC,CACrCJ,KAAK/B,MAAL,GACD,CAFD,QAEWgC,MAAJ,CAAY;;;AAIjBD,KAAKK,UAAL,CAAgB,QAAhB,EACD,CAED,QAAA,CACD;;AAID,wBAAA,CAAuBL,IAAvB,CAA6B1C,CAA7B,CAAgC,CAC9B,GAAI9B,YAAUiD,IAAV,CAAeuB,KAAKG,IAAL,CAAU,KAAV,CAAf,CAAJ,CAAsC,CACpCH,KAAK/B,MAAL,GACD,CAED,QAAA,CACD,CAED,sBAAA,CAAqB4C,QAArB,CAA+BvD,CAA/B,CAAkC,CAChCuD,SAASC,IAAT,CAAc,KAAd,EAAqBtD,IAArB,CAA0B,SAAUC,KAAV,CAAiBqQ,GAAjB,CAAsB,CAC9C,SAAWxQ,EAAEwQ,GAAF,CAAX,CAEA/N,iBAAeC,IAAf,CAAqB1C,CAArB,EACAgD,gBAAcN,IAAd,CAAoB1C,CAApB,EACD,CALD,EAOA,QAAA,CACD,CAED,qBAAA,CAAoBkD,OAApB,CAA6BlD,CAA7B,CAAgCyJ,GAAhC,CAAqC,CACnC,SAAWwD,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,EAA/E,CAEA,GAAI9J,KAAKxB,MAAL,GAAgB,CAApB,CAAuB,CACrBwB,KAAO9E,gBAAP,CACD,CAED,GAAIoL,GAAJ,CAAS,CACP,eAAiBE,MAAIoF,KAAJ,CAAUtF,GAAV,CAAjB,CACIgH,SAAWC,WAAWD,QAD1B,CAEIhD,SAAWiD,WAAWjD,QAF1B,CAIAtK,KAAO,GAAGwN,MAAH,CAAUC,qBAAmBzN,IAAnB,CAAV,CAAoC,CAAC,gBAAkBsN,QAAlB,CAA6B,IAA7B,CAAoChD,QAApC,CAA+C,IAAhD,CAApC,CAAP,CACD,CAEDzN,EAAEmD,KAAKvE,IAAL,CAAU,GAAV,CAAF,CAAkBsE,OAAlB,EAA2B2N,QAA3B,CAAoCzS,YAApC,EAEA,QAAA,CACD,CAED,wBAAA,CAAuB8E,OAAvB,CAAgClD,CAAhC,CAAmC,CACjC,SAAWiN,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,EAA/E,CAEA,GAAI9J,KAAKxB,MAAL,GAAgB,CAApB,CAAuB,CACrBwB,KAAO7E,mBAAP,CACD;;AAID0B,EAAEmD,KAAKvE,IAAL,CAAU,GAAV,CAAF,CAAkBsE,OAAlB,EAA2BE,GAA3B,CAA+B,IAAMhF,YAArC,EAAiDuC,MAAjD;AAGAX,EAAE,IAAM5B,YAAR,CAAoB8E,OAApB,EAA6BG,WAA7B,CAAyCjF,YAAzC,EAEA,QAAA,CACD;;;AAMD,sBAAA,CAAuB8E,OAAvB,CAAgClD,CAAhC,CAAmC,CACjC,WAAaA,EAAE,IAAF,CAAQkD,OAAR,CAAb,CAEA,GAAI4N,OAAOnP,MAAP,CAAgB,CAApB,CAAuB,CACrBmP,OAAO5Q,IAAP,CAAY,SAAUC,KAAV,CAAiBU,IAAjB,CAAuB,CACjC,SAASA,IAAF,EAAQF,MAAR,EAAP,CACD,CAFD,EAGD,CAJD,IAIO,CACLmQ,OAAO5Q,IAAP,CAAY,SAAUC,KAAV,CAAiBU,IAAjB,CAAuB,CACjCwP,iBAAiBrQ,EAAEa,IAAF,CAAjB,CAA0Bb,CAA1B,CAA6B,IAA7B,EACD,CAFD,EAGD,CAED,QAAA,CACD,CAED,gCAAA,CAA+BuD,QAA/B,CAAyC,CACvCA,SAASC,IAAT,CAAc,GAAd,EAAmBtD,IAAnB,CAAwB,SAAUC,KAAV,CAAiBU,IAAjB,CAAuB,CAC7C,UAAYqB,WAASrB,IAAT,CAAZ,CAEAwK,WAASxK,IAAT,CAAe0P,mBAAiBtO,KAAjB,EAAwBwB,MAAxB,CAA+B,SAAUC,GAAV,CAAeb,IAAf,CAAqB,CACjE,GAAI/D,qBAAmBqC,IAAnB,CAAwB0B,IAAxB,CAAJ,CAAmC,CACjC,kBAAgB,EAAT,CAAaa,GAAb,CAAkBqN,kBAAgB,EAAhB,CAAoBlO,IAApB,CAA0BZ,MAAMY,IAAN,CAA1B,CAAlB,CAAP,CACD,CAED,UAAA,CACD,CANc,CAMZ,EANY,CAAf,EAOD,CAVD,EAYA,eAAA,CACD;;;;;;AASD,2BAAA,CAA4BU,QAA5B,CAAsC;;;AAIpC,+BAA6BA,SAAS+C,MAAT,GAAkB3E,MAAlB,CAA2B4B,SAAS+C,MAAT,EAA3B,CAA+C/C,QAArE,CAAP,CACD,CAED,sBAAA,CAAqBA,QAArB,CAA+BvD,CAA/B,CAAkC,CAChCuD,SAASC,IAAT,CAAc,GAAd,EAAmBtD,IAAnB,CAAwB,SAAUC,KAAV,CAAiBe,CAAjB,CAAoB,CAC1C,OAASlB,EAAEkB,CAAF,CAAT,CACA,GAAI8P,GAAGxN,IAAH,CAAQ,aAAR,EAAuB7B,MAAvB,GAAkC,CAAlC,EAAuCqP,GAAGzO,IAAH,GAAUuD,IAAV,KAAqB,EAAhE,CAAoEkL,GAAGrQ,MAAH,GACrE,CAHD,EAKA,QAAA,CACD;;;;;AAQD,sCAAsC,CAAC,UAAD,CAAa,OAAb,CAAsB,QAAtB,CAAgC,SAAhC,CAA2C,SAA3C,CAAsD,KAAtD,CAA6D,gBAA7D,CAA+E,OAA/E,CAAwF,SAAxF,CAAmG,cAAnG,CAAmH,QAAnH,CAA6H,iBAA7H,CAAgJ,OAAhJ,CAAyJ,MAAzJ,CAAiK,MAAjK,CAAyK,QAAzK,CAAmL,QAAnL,CAA6L,QAA7L,CAAuM,OAAvM;AACtC,MADsC,CAC9B,MAD8B,CACtB,KADsB,CACf,OADe,CACN,YADM,CACQ,UADR;AAEtC,2BAFsC;AAGtC,OAHsC,CAG7B,eAH6B,CAGZ,SAHY,CAGD,QAHC,CAGS,QAHT,CAGmB,KAHnB,CAG0B,OAH1B,CAGmC,UAHnC,CAG+C,SAH/C,CAG0D,UAH1D,CAGsE,SAHtE,CAGiF,OAHjF,CAAtC;;;;;;;;;;;AAgBA,sCAAsC,CAAC,KAAD,CAAQ,SAAR,CAAmB,MAAnB,CAA2B,WAA3B,CAAwC,QAAxC,CAAkD,SAAlD,CAA6D,qBAA7D,CAAoF,QAApF;AACtC,OADsC,CAC7B,QAD6B,CACnB,OADmB,CACV,MADU,CACF,MADE,CACM,OADN,CACe,QADf,CAAtC;;;AAMA,4BAA4B,CAAC,GAAD,CAAM,YAAN,CAAoB,IAApB,CAA0B,KAA1B,CAAiC,KAAjC,CAAwC,GAAxC,CAA6C,KAA7C,CAAoD,OAApD,EAA6D/B,IAA7D,CAAkE,GAAlE,CAA5B;;AAIA,+BAA+B,CAAC,IAAD,CAAO,GAAP,CAAY,GAAZ,CAAiB,OAAjB,CAA0B,IAA1B,CAAgC,MAAhC,CAAwC,MAAxC,CAAgD,UAAhD,CAA4D,OAA5D,CAAqE,KAArE,CAA4E,MAA5E,CAAoF,MAApF,CAA/B,CAEA,kCAAkC,UAAA,CAAW,KAAOqS,2BAAyBrS,IAAzB,CAA8B,GAA9B,CAAP,CAA4C,IAAvD,CAA6D,GAA7D,CAAlC;;;AAKA,gCAAgC,CAAC,CAAC,SAAD,CAAY,gBAAZ,CAAD,CAAgC,CAAC,OAAD,CAAU,gBAAV,CAAhC,CAA6D,CAAC,QAAD,CAAW,gBAAX,CAA7D,CAA2F,CAAC,OAAD,CAAU,WAAV,CAA3F,CAAmH,CAAC,OAAD,CAAU,YAAV,CAAnH,CAA4I,CAAC,OAAD,CAAU,YAAV,CAA5I,CAAhC,CAEA,oBAAoB,CAAC,QAAD,CAAW,OAAX,CAAoB,OAApB,CAA6B,SAA7B,CAApB,CACA,uBAAuB,UAAA,CAAWsS,gBAActS,IAAd,CAAmB,GAAnB,CAAX,CAAoC,GAApC,CAAvB;;;;AAMA,6BAA6B,CAAC,SAAD,CAAY,gBAAZ,CAA8B,iBAA9B,CAAiD,MAAjD,CAAyD,MAAzD,CAAiE,SAAjE,CAA4E,qBAA5E,CAAmG,OAAnG,CAA4G,QAA5G,CAAsH,MAAtH,CAA8H,QAA9H,CAAwI,MAAxI,CAAgJ,YAAhJ,CAA8J,WAA9J,CAA2K,MAA3K,CAAmL,OAAnL,CAA4L,MAA5L,CAAoM,UAApM;AAC7B,SAD6B,CAA7B;AAIA,0BAA0B,UAAA,CAAWuS,yBAAuBvS,IAAvB,CAA4B,GAA5B,CAAX,CAA6C,GAA7C,CAA1B;AAGA,0BAA0B,UAAA,CAAW,qBAAX,CAAkC,GAAlC,CAA1B;;;;AAMA,6BAA6B,CAAC,OAAD,CAAU,QAAV,CAAoB,QAApB,CAA8B,KAA9B,CAAqC,UAArC,CAAiD,QAAjD,CAA2D,QAA3D,CAAqE,OAArE,CAA8E,MAA9E,CAAsF,OAAtF,CAA+F,SAA/F,CAA0G,YAA1G,CAAwH,SAAxH,CAAmI,MAAnI,CAA2I,QAA3I,CAAqJ,OAArJ,CAA8J,MAA9J,CAAsK,MAAtK,CAA8K,SAA9K,CAAyL,UAAzL;AAC7B,MAD6B,CACrB,QADqB,CACX,UADW,CACC,MADD,CACS,MADT,CACiB,MADjB,CACyB,UADzB;AAE7B,mBAF6B,CAER,MAFQ,CAEA,WAFA,CAEa,MAFb,CAEqB,UAFrB,CAEiC,OAFjC,CAE0C,MAF1C,CAEkD,OAFlD,CAE2D,UAF3D;AAG7B,OAH6B,CAGpB,KAHoB;AAI7B,SAJ6B,CAIlB,SAJkB,CAIP,cAJO;AAK7B,QAL6B,CAKnB,WALmB,CAKN,OALM,CAKG,UALH,CAKe,UALf,CAK2B,MAL3B,CAKmC,SALnC,CAK8C,SAL9C,CAKyD,OALzD,CAKkE,KALlE,CAKyE,SALzE,CAKoF,MALpF,CAK4F,OAL5F,CAKqG,QALrG,CAA7B;AAOA,0BAA0B,UAAA,CAAWwS,yBAAuBxS,IAAvB,CAA4B,GAA5B,CAAX,CAA6C,GAA7C,CAA1B;;;;;;;;;AAoBA,4BAA4ByS,kCAAgCzS,IAAhC,CAAqC,GAArC,CAA5B,CAGA,4BAA4B0S,kCAAgC1S,IAAhC,CAAqC,GAArC,CAA5B,CAKA,6BAA6B,UAAA,CAAW,mBAAX,CAAgC,GAAhC,CAA7B,CACA,2BAA2B,UAAA,CAAW,4BAAX,CAAyC,GAAzC,CAA3B,CACA,iBAAiB,UAAA,CAAW,kBAAX,CAA+B,GAA/B,CAAjB;AAGA,oBAAA,CAAmBiC,IAAnB,CAAyB,CACvB,YAAcA,KAAKgC,IAAL,CAAU,OAAV,CAAd,CACA,OAAShC,KAAKgC,IAAL,CAAU,IAAV,CAAT,CACA,UAAY,CAAZ,CAEA,GAAIkC,EAAJ,CAAQ;AAEN,GAAIwM,sBAAoBpQ,IAApB,CAAyB4D,EAAzB,CAAJ,CAAkC,CAChCC,OAAS,EAAT,CACD,CACD,GAAIwM,sBAAoBrQ,IAApB,CAAyB4D,EAAzB,CAAJ,CAAkC,CAChCC,OAAS,EAAT,CACD,CACF,CAED,GAAIF,OAAJ,CAAa,CACX,GAAIE,QAAU,CAAd,CAAiB;;AAGf,GAAIuM,sBAAoBpQ,IAApB,CAAyB2D,OAAzB,CAAJ,CAAuC,CACrCE,OAAS,EAAT,CACD,CACD,GAAIwM,sBAAoBrQ,IAApB,CAAyB2D,OAAzB,CAAJ,CAAuC,CACrCE,OAAS,EAAT,CACD,CACF;;;AAKD,GAAIyM,mBAAiBtQ,IAAjB,CAAsB2D,OAAtB,CAAJ,CAAoC,CAClCE,OAAS,EAAT,CACD;;;;AAMD,GAAI0M,sBAAoBvQ,IAApB,CAAyB2D,OAAzB,CAAJ,CAAuC,CACrCE,OAAS,EAAT,CACD,CACF,CAED,YAAA,CACD;;;AAKD,mBAAA,CAAkBjE,KAAlB,CAAyB;AAEvB,kBAAkBA,MAAM8B,IAAN,CAAW,OAAX,CAAX,GAAmC,IAA1C,CACD;AAGD,sBAAA,CAAqBN,IAArB,CAA2B,CACzB,MAAO,CAACA,KAAK6C,KAAL,CAAW,IAAX,GAAoB,EAArB,EAAyBzD,MAAhC,CACD,CAED,YAAY,UAAA,CAAW,WAAX,CAAwB,GAAxB,CAAZ,CAEA,sBAAA,CAAqB4D,UAArB,CAAiC,CAC/B,YAAc0H,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,GAAlF,CAEA,WAAa1H,WAAa,EAA1B,CAEA,GAAIC,OAAS,CAAb,CAAgB,CACd,gBAAkB,MAAlB;;;;;AAOA,GAAIH,QAAMlE,IAAN,CAAWV,OAAX,CAAJ,CAAyB,CACvBgF,YAAcD,OAAS,CAAvB,CACD,CAFD,IAEO,CACLC,YAAcD,OAAS,IAAvB,CACD,CAED,YAAYG,GAAL,CAASD,KAAKE,GAAL,CAASH,WAAT,CAAsB,CAAtB,CAAT,CAAmC,CAAnC,CAAP,CACD,CAED,QAAA,CACD;;AAID,0BAAA,CAA2B5E,IAA3B,CAAiC,CAC/B,UAAY,CAAZ,CACA,SAAWA,KAAK0B,IAAL,GAAYuD,IAAZ,EAAX,CACA,eAAiBvD,KAAKZ,MAAtB;AAGA,GAAI4D,WAAa,EAAjB,CAAqB,CACnB,QAAA,CACD;AAGDP,OAASG,cAAY5C,IAAZ,CAAT;;AAIAyC,OAASM,cAAYC,UAAZ,CAAT;;;;AAMA,GAAIhD,KAAKwD,KAAL,CAAW,CAAC,CAAZ,IAAmB,GAAvB,CAA4B,CAC1Bf,OAAS,CAAT,CACD,CAED,YAAA,CACD,CAED,mBAAA,CAAkBjE,KAAlB,CAAyBf,CAAzB,CAA4BgF,KAA5B,CAAmC,CACjCjE,MAAM8B,IAAN,CAAW,OAAX,CAAoBmC,KAApB,EACA,YAAA,CACD,CAED,oBAAA,CAAqBjE,KAArB,CAA4Bf,CAA5B,CAA+BkG,MAA/B,CAAuC,CACrC,GAAI,CACF,UAAYyL,kBAAkB5Q,KAAlB,CAAyBf,CAAzB,EAA8BkG,MAA1C,CACAF,WAASjF,KAAT,CAAgBf,CAAhB,CAAmBgF,KAAnB,EACD,CAAC,MAAOoB,CAAP,CAAU;CAIZ,YAAA,CACD;AAGD,uBAAA,CAAwBvF,IAAxB,CAA8Bb,CAA9B,CAAiCgF,KAAjC,CAAwC,CACtC,WAAanE,KAAKyF,MAAL,EAAb,CACA,GAAIA,MAAJ,CAAY,CACVsL,YAAYtL,MAAZ,CAAoBtG,CAApB,CAAuBgF,MAAQ,IAA/B,EACD,CAED,WAAA,CACD;;;AAKD,0BAAA,CAA2BjE,KAA3B,CAAkCf,CAAlC,CAAqC,CACnC,gBAAkBiN,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,IAAtF,CAEA,UAAYhI,WAASlE,KAAT,CAAZ,CAEA,GAAIiE,KAAJ,CAAW,CACT,YAAA,CACD,CAEDA,MAAQ6M,aAAa9Q,KAAb,CAAR,CAEA,GAAIwF,WAAJ,CAAiB,CACfvB,OAASH,YAAU9D,KAAV,CAAT,CACD,CAED+Q,eAAe/Q,KAAf,CAAsBf,CAAtB,CAAyBgF,KAAzB,EAEA,YAAA,CACD;;AAID,qBAAA,CAAsBjE,KAAtB,CAA6B,CAC3B,eAAiBA,MAAMP,GAAN,CAAU,CAAV,CAAjB,CACIC,QAAUsR,WAAWtR,OADzB;;;AAQA,GAAIuR,yBAAuB7Q,IAAvB,CAA4BV,OAA5B,CAAJ,CAA0C,CACxC,yBAAyBM,KAAlB,CAAP,CACD,CAFD,QAEWN,QAAQC,WAAR,KAA0B,KAA9B,CAAqC,CAC1C,QAAA,CACD,CAFM,QAEIuR,uBAAqB9Q,IAArB,CAA0BV,OAA1B,CAAJ,CAAwC,CAC7C,QAAA,CACD,CAFM,QAEIyR,aAAW/Q,IAAX,CAAgBV,OAAhB,CAAJ,CAA8B,CACnC,MAAO,CAAC,CAAR,CACD,CAFM,QAEIA,QAAQC,WAAR,KAA0B,IAA9B,CAAoC,CACzC,MAAO,CAAC,CAAR,CACD,CAED,QAAA,CACD,CAED,yBAAA,CAAwBK,KAAxB,CAA+Bf,CAA/B,CAAkC,CAChC,GAAIe,MAAMP,GAAN,CAAU,CAAV,CAAJ,CAAkB,CAChB,eAAiBO,MAAMP,GAAN,CAAU,CAAV,CAAjB,CACIC,QAAUsR,WAAWtR,OADzB,CAGA,GAAIA,UAAY,MAAhB,CAAwB;AAEtB4P,iBAAiBtP,KAAjB,CAAwBf,CAAxB,CAA2B,KAA3B,EACD,CACF,CACF,CAED,qBAAA,CAAoBe,KAApB,CAA2Bf,CAA3B,CAA8BgF,KAA9B,CAAqC,CACnC,GAAIjE,KAAJ,CAAW,CACToR,iBAAepR,KAAf,CAAsBf,CAAtB,EACA4R,YAAY7Q,KAAZ,CAAmBf,CAAnB,CAAsBgF,KAAtB,EACD,CACF,CAED,kBAAA,CAAiBhF,CAAjB,CAAoBuG,WAApB,CAAiC,CAC/BvG,EAAE,QAAF,EAAYoD,GAAZ,CAAgB,SAAhB,EAA2BlD,IAA3B,CAAgC,SAAUC,KAAV,CAAiBU,IAAjB,CAAuB;;AAGrD,UAAYb,EAAEa,IAAF,CAAZ,CACAE,MAAQiF,WAASjF,KAAT,CAAgBf,CAAhB,CAAmB2R,kBAAkB5Q,KAAlB,CAAyBf,CAAzB,CAA4BuG,WAA5B,CAAnB,CAAR,CAEA,YAAcxF,MAAMuF,MAAN,EAAd,CACA,aAAeuL,aAAa9Q,KAAb,CAAf,CAEA0F,aAAWE,OAAX,CAAoB3G,CAApB,CAAuB4G,QAAvB,CAAiCL,WAAjC,EACA,GAAII,OAAJ,CAAa;;AAGXF,aAAWE,QAAQL,MAAR,EAAX,CAA6BtG,CAA7B,CAAgC4G,SAAW,CAA3C,CAA8CL,WAA9C,EACD,CACF,CAfD,EAiBA,QAAA,CACD;;AAID,wBAAA,CAAyBvG,CAAzB,CAA4B,CAC1B,gBAAkBiN,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,IAAtF;;AAIAmF,4BAA0BtI,OAA1B,CAAkC,SAAU0D,IAAV,CAAgB,CAChD,UAAY6E,iBAAe7E,IAAf,CAAqB,CAArB,CAAZ,CACI8E,eAAiBC,MAAM,CAAN,CADrB,CAEIC,cAAgBD,MAAM,CAAN,CAFpB,CAIAvS,EAAEsS,eAAiB,GAAjB,CAAuBE,aAAzB,EAAwCtS,IAAxC,CAA6C,SAAUC,KAAV,CAAiBU,IAAjB,CAAuB,CAClE+Q,YAAY5R,EAAEa,IAAF,EAAQyF,MAAR,CAAegM,cAAf,CAAZ,CAA4CtS,CAA5C,CAA+C,EAA/C,EACD,CAFD,EAGD,CARD;;;;;AAeA0G,UAAQ1G,CAAR,CAAWuG,WAAX,EACAG,UAAQ1G,CAAR,CAAWuG,WAAX,EAEA,QAAA,CACD,CAED,mBAAmB,SAAnB,CAEA,0BAAA,CAAyBhE,IAAzB,CAA+B,CAC7B,YAAYwE,OAAL,CAAaF,cAAb,CAA2B,GAA3B,EAAgCf,IAAhC,EAAP,CACD;;;;;AAOD,yBAAA,CAAwB2D,GAAxB,CAA6BgJ,SAA7B,CAAwC,CACtC,YAAcA,UAAUjP,IAAV,CAAe,SAAUkP,EAAV,CAAc,CACzC,UAAUvR,IAAH,CAAQsI,GAAR,CAAP,CACD,CAFa,CAAd;AAIA,GAAIkJ,OAAJ,CAAa,CACX,eAAeC,IAAR,CAAanJ,GAAb,EAAkB,CAAlB,CAAP,CACD,CAED,WAAA,CACD;;;;;;;;;;;;;;;;AAkBD,sBAAsB,UAAA,CAAW,0EAAX,CAAuF,GAAvF,CAAtB,CAEA,mBAAmB,QAAnB,CAEA,kBAAkB,WAAlB,CACA,kBAAkB,WAAlB,CAEA,yBAAA,CAAwBA,GAAxB,CAA6B,CAC3B,YAAcA,IAAIrE,KAAJ,CAAU4B,iBAAV,CAAd,CACA,GAAI,CAAC6L,OAAL,CAAc,WAAA,CAEd,YAAcjQ,SAASiQ,QAAQ,CAAR,CAAT,CAAqB,EAArB,CAAd;;AAIA,eAAiB,GAAV,CAAgBC,OAAhB,CAA0B,IAAjC,CACD,CAED,uBAAA,CAAsBrJ,GAAtB,CAA2B,CACzB,WAAWsJ,KAAJ,CAAU,GAAV,EAAe,CAAf,EAAkBhM,OAAlB,CAA0B,KAA1B,CAAiC,EAAjC,CAAP,CACD,CAED,wBAAA,CAAuBM,OAAvB,CAAgClH,KAAhC,CAAuCmH,sBAAvC,CAA+D,CAC7D,gBAAkB,IAAlB;;AAIA,GAAInH,MAAQ,CAAR,EAAagH,cAAYhG,IAAZ,CAAiBkG,OAAjB,CAAb,EAA0CA,QAAQ1F,MAAR,CAAiB,CAA/D,CAAkE,CAChE4F,YAAc,IAAd,CACD;;AAID,GAAIpH,QAAU,CAAV,EAAekH,QAAQ3G,WAAR,KAA0B,OAA7C,CAAsD,CACpD6G,YAAc,KAAd,CACD;;AAID,GAAIpH,MAAQ,CAAR,EAAakH,QAAQ1F,MAAR,CAAiB,CAA9B,EAAmC,CAAC2F,sBAAxC,CAAgE,CAC9DC,YAAc,KAAd,CACD,CAED,kBAAA,CACD;;;AAKD,yBAAA,CAAwBkC,GAAxB,CAA6BuJ,MAA7B,CAAqC,CACnC,cAAgBA,QAAUrJ,MAAIoF,KAAJ,CAAUtF,GAAV,CAA1B,CACA,aAAeoF,UAAU4B,QAAzB,CACIwC,KAAOpE,UAAUoE,IADrB,CAEIC,KAAOrE,UAAUqE,IAFrB,CAKA,2BAA6B,KAA7B,CACA,oBAAsBA,KAAKH,KAAL,CAAW,GAAX,EAAgBI,OAAhB,GAA0B1P,MAA1B,CAAiC,SAAUC,GAAV,CAAe0P,UAAf,CAA2BjT,KAA3B,CAAkC,CACvF,YAAciT,UAAd;AAGA,GAAI/L,QAAQuD,QAAR,CAAiB,GAAjB,CAAJ,CAA2B,CACzB,mBAAqBvD,QAAQ0L,KAAR,CAAc,GAAd,CAArB,CACIM,gBAAkBhB,iBAAeiB,cAAf,CAA+B,CAA/B,CADtB,CAEIC,gBAAkBF,gBAAgB,CAAhB,CAFtB,CAGIG,QAAUH,gBAAgB,CAAhB,CAHd,CAKA,GAAInM,cAAY/F,IAAZ,CAAiBqS,OAAjB,CAAJ,CAA+B,CAC7BnM,QAAUkM,eAAV,CACD,CACF;;AAID,GAAIvM,kBAAgB7F,IAAhB,CAAqBkG,OAArB,GAAiClH,MAAQ,CAA7C,CAAgD,CAC9CkH,QAAUA,QAAQN,OAAR,CAAgBC,iBAAhB,CAAiC,EAAjC,CAAV,CACD;;;;AAMD,GAAI7G,QAAU,CAAd,CAAiB,CACfmH,uBAAyBL,eAAa9F,IAAb,CAAkBkG,OAAlB,CAAzB,CACD;AAGD,GAAID,gBAAcC,OAAd,CAAuBlH,KAAvB,CAA8BmH,sBAA9B,CAAJ,CAA2D,CACzD5D,IAAI+P,IAAJ,CAASpM,OAAT,EACD,CAED,UAAA,CACD,CAnCqB,CAmCnB,EAnCmB,CAAtB,CAqCA,gBAAkB,IAAX,CAAkB4L,IAAlB,CAAyBS,gBAAgBP,OAAhB,GAA0BvU,IAA1B,CAA+B,GAA/B,CAAhC,CACD;;AAID,sBAAsB,UAAA,CAAW,QAAX,CAAtB,CACA,yBAAA,CAAwB2D,IAAxB,CAA8B,CAC5B,yBAAuBpB,IAAhB,CAAqBoB,IAArB,CAAP,CACD,CAED,yBAAA,CAAwBqG,OAAxB,CAAiC,CACnB,UAAYqE,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,EAAhF,CAEA,eAAenH,IAAR,GAAeiN,KAAf,CAAqB,KAArB,EAA4BhN,KAA5B,CAAkC,CAAlC,CAAqC4N,KAArC,EAA4C/U,IAA5C,CAAiD,GAAjD,CAAP,CACb;;;;;AAOD,wBAAA,CAAuB+I,UAAvB,CAAmCC,QAAnC,CAA6C5H,CAA7C,CAAgD,CAC9C,GAAI,CAAC2H,WAAWrB,MAAX,GAAoB3E,MAAzB,CAAiC,CAC/B,iBAAA,CACD,CAED,0BAA4B+D,KAAKE,GAAL,CAAS,EAAT,CAAagC,SAAW,IAAxB,CAA5B,CACA,gBAAkB5H,EAAE,aAAF,CAAlB,CAEA2H,WAAWrB,MAAX,GAAoB5E,QAApB,GAA+BxB,IAA/B,CAAoC,SAAUC,KAAV,CAAiBa,OAAjB,CAA0B,CAC5D,aAAehB,EAAEgB,OAAF,CAAf;AAEA,GAAI4S,8BAA4BzS,IAA5B,CAAiCH,QAAQP,OAAzC,CAAJ,CAAuD,CACrD,WAAA,CACD,CAED,iBAAmBwE,WAAS8C,QAAT,CAAnB,CACA,GAAIC,YAAJ,CAAkB,CAChB,GAAID,SAASvH,GAAT,CAAa,CAAb,IAAoBmH,WAAWnH,GAAX,CAAe,CAAf,CAAxB,CAA2C,CACzCsH,YAAYG,MAAZ,CAAmBF,QAAnB,EACD,CAFD,IAEO,CACL,iBAAmB,CAAnB,CACA,YAAcK,cAAYL,QAAZ,CAAd;;AAIA,GAAII,QAAU,IAAd,CAAoB,CAClBD,cAAgB,EAAhB,CACD;;AAID,GAAIC,SAAW,GAAf,CAAoB,CAClBD,cAAgB,EAAhB,CACD;;AAID,GAAIH,SAASlF,IAAT,CAAc,OAAd,IAA2B8E,WAAW9E,IAAX,CAAgB,OAAhB,CAA/B,CAAyD,CACvDqF,cAAgBN,SAAW,GAA3B,CACD,CAED,aAAeI,aAAeE,YAA9B,CAEA,GAAIG,UAAYR,qBAAhB,CAAuC,CACrC,mBAAmBI,MAAZ,CAAmBF,QAAnB,CAAP,CACD,CAFD,QAEW/G,QAAQP,OAAR,GAAoB,GAAxB,CAA6B,CAClC,mBAAqBsH,SAASxF,IAAT,EAArB,CACA,yBAA2BgD,aAAW+C,cAAX,CAA3B,CAEA,GAAIC,qBAAuB,EAAvB,EAA6BJ,QAAU,IAA3C,CAAiD,CAC/C,mBAAmBF,MAAZ,CAAmBF,QAAnB,CAAP,CACD,CAFD,QAEWQ,sBAAwB,EAAxB,EAA8BJ,UAAY,CAA1C,EAA+CV,iBAAea,cAAf,CAAnD,CAAmF,CACxF,mBAAmBL,MAAZ,CAAmBF,QAAnB,CAAP,CACD,CACF,CACF,CACF,CAED,WAAA,CACD,CAnDD,EAqDA,GAAID,YAAYpG,QAAZ,GAAuBC,MAAvB,GAAkC,CAAlC,EAAuCmG,YAAYpG,QAAZ,GAAuB8G,KAAvB,GAA+BhI,GAA/B,CAAmC,CAAnC,IAA0CmH,WAAWnH,GAAX,CAAe,CAAf,CAArF,CAAwG,CACtG,iBAAA,CACD,CAED,kBAAA,CACD;;AAID,4BAAA,CAA6BR,CAA7B,CAAgC,CAC9B,eAAiB,MAAjB,CACA,aAAe,CAAf,CAEAA,EAAE,SAAF,EAAaE,IAAb,CAAkB,SAAUC,KAAV,CAAiBU,IAAjB,CAAuB;AAEvC,GAAI+S,8BAA4BzS,IAA5B,CAAiCN,KAAKJ,OAAtC,CAAJ,CAAoD,CAClD,OACD,CAED,UAAYT,EAAEa,IAAF,CAAZ,CACA,UAAYoE,WAASlE,KAAT,CAAZ,CAEA,GAAIiE,MAAQ4C,QAAZ,CAAsB,CACpBA,SAAW5C,KAAX,CACA2C,WAAa5G,KAAb,CACD,CACF,CAbD;;AAiBA,GAAI,CAAC4G,UAAL,CAAiB,CACf,SAAS,MAAF,GAAa3H,EAAE,GAAF,EAAOwI,KAAP,EAApB,CACD,CAEDb,WAAaD,gBAAcC,UAAd,CAA0BC,QAA1B,CAAoC5H,CAApC,CAAb,CAEA,iBAAA,CACD;AAID,8BAAA,CAA6Be,KAA7B,CAAoCf,CAApC,CAAuC0I,MAAvC,CAA+C;;;;AAK7C,GAAI3H,MAAM4H,QAAN,CAAe,qBAAf,CAAJ,CAA2C,CACzC,OACD,CAED,YAAc7B,kBAAgB/F,MAAMwB,IAAN,EAAhB,CAAd,CAEA,GAAI4C,cAAYyD,OAAZ,EAAuB,EAA3B,CAA+B,CAC7B,WAAa5I,EAAE,GAAF,CAAOe,KAAP,EAAcY,MAA3B,CACA,eAAiB3B,EAAE,OAAF,CAAWe,KAAX,EAAkBY,MAAnC;AAGA,GAAImH,WAAaD,OAAS,CAA1B,CAA6B,CAC3B9H,MAAMJ,MAAN,GACA,OACD,CAED,kBAAoBiI,QAAQjH,MAA5B,CACA,aAAe3B,EAAE,KAAF,CAASe,KAAT,EAAgBY,MAA/B;;AAIA,GAAIoH,cAAgB,EAAhB,EAAsBC,WAAa,CAAvC,CAA0C,CACxCjI,MAAMJ,MAAN,GACA,OACD,CAED,YAAcyH,cAAYrH,KAAZ,CAAd;;;AAKA,GAAI2H,OAAS,EAAT,EAAeP,QAAU,GAAzB,EAAgCY,cAAgB,EAApD,CAAwD,CACtDhI,MAAMJ,MAAN,GACA,OACD;;AAID,GAAI+H,QAAU,EAAV,EAAgBP,QAAU,GAA9B,CAAmC;;;AAIjC,YAAcpH,MAAMP,GAAN,CAAU,CAAV,EAAaC,OAAb,CAAqBC,WAArB,EAAd,CACA,eAAiBD,UAAY,IAAZ,EAAoBA,UAAY,IAAjD,CACA,GAAIwI,UAAJ,CAAgB,CACd,iBAAmBlI,MAAMoI,IAAN,EAAnB,CACA,GAAID,cAAgBpC,kBAAgBoC,aAAa3G,IAAb,EAAhB,EAAqCwD,KAArC,CAA2C,CAAC,CAA5C,IAAmD,GAAvE,CAA4E,CAC1E,OACD,CACF,CAEDhF,MAAMJ,MAAN,GACA,OACD,CAED,gBAAkBX,EAAE,QAAF,CAAYe,KAAZ,EAAmBY,MAArC;AAGA,GAAIyH,YAAc,CAAd,EAAmBL,cAAgB,GAAvC,CAA4C,CAC1ChI,MAAMJ,MAAN,GACA,OACD,CACF,CACF;;;;;;;AASD,qBAAA,CAAsB4C,QAAtB,CAAgCvD,CAAhC,CAAmC,CACjCA,EAAEd,0BAAF,CAA4BqE,QAA5B,EAAsCrD,IAAtC,CAA2C,SAAUC,KAAV,CAAiBU,IAAjB,CAAuB,CAChE,UAAYb,EAAEa,IAAF,CAAZ,CACA,WAAaoE,WAASlE,KAAT,CAAb,CACA,GAAI,CAAC2H,MAAL,CAAa,CACXA,OAASiJ,kBAAkB5Q,KAAlB,CAAyBf,CAAzB,CAAT,CACAgG,WAASjF,KAAT,CAAgBf,CAAhB,CAAmB0I,MAAnB,EACD;AAGD,GAAIA,OAAS,CAAb,CAAgB,CACd3H,MAAMJ,MAAN,GACD,CAFD,IAEO;AAEL8H,sBAAoB1H,KAApB,CAA2Bf,CAA3B,CAA8B0I,MAA9B,EACD,CACF,CAfD,EAiBA,QAAA,CACD,CAED,uBAAA,CAAsBnF,QAAtB,CAAgCvD,CAAhC,CAAmC,CACjC,UAAYiN,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,EAAhF,CAEAjN,EAAEZ,iBAAF,CAAmBmE,QAAnB,EAA6BrD,IAA7B,CAAkC,SAAUC,KAAV,CAAiB0T,MAAjB,CAAyB,CACzD,YAAc7T,EAAE6T,MAAF,CAAd;;;;AAKA,GAAI7T,EAAE8T,OAAF,CAAWvQ,QAAX,EAAqBwQ,OAArB,CAA6B,GAA7B,EAAkCpS,MAAlC,GAA6C,CAAjD,CAAoD,CAClD,eAAehB,MAAR,EAAP,CACD;AAGD,GAAImG,kBAAgB9G,EAAE6T,MAAF,EAAUtR,IAAV,EAAhB,IAAsCyR,KAA1C,CAAiD,CAC/C,eAAerT,MAAR,EAAP,CACD;;AAID,GAAIkE,YAAU7E,EAAE6T,MAAF,CAAV,EAAuB,CAA3B,CAA8B,CAC5B,eAAelT,MAAR,EAAP,CACD,CAED,cAAA,CACD,CAtBD,EAwBA,QAAA,CACD;;AAKD,2BAAA,CAA4BuC,OAA5B,CAAqClD,CAArC,CAAwC;;;AAItCA,EAAIqQ,iBAAiBrQ,EAAE,MAAF,CAAjB,CAA4BA,CAA5B,CAA+B,KAA/B,CAAJ,CACAA,EAAIqQ,iBAAiBrQ,EAAE,MAAF,CAAjB,CAA4BA,CAA5B,CAA+B,KAA/B,CAAJ,CAEA,QAAA,CACD,qBAGD,qBAAA,CAAoBA,CAApB,CAAuBsJ,OAAvB,CAAgCzG,IAAhC,CAAsC0G,QAAtC,CAAgD,CAC9CvJ,EAAE,IAAM6C,IAAN,CAAa,GAAf,CAAoB0G,QAApB,EAA8BrJ,IAA9B,CAAmC,SAAUsJ,CAAV,CAAa3I,IAAb,CAAmB,CACpD,UAAYqB,WAASrB,IAAT,CAAZ,CACA,QAAUoB,MAAMY,IAAN,CAAV,CAEA,GAAI4G,GAAJ,CAAS,CACP,gBAAkBE,MAAIC,OAAJ,CAAYN,OAAZ,CAAqBG,GAArB,CAAlB,CACAyB,UAAQrK,IAAR,CAAcgC,IAAd,CAAoB6G,WAApB,EACD,CACF,CARD,EASD,CAED,6BAAA,CAA8BH,QAA9B,CAAwCvJ,CAAxC,CAA2CyJ,GAA3C,CAAgD,CAC9C,CAAC,MAAD,CAAS,KAAT,EAAgBK,OAAhB,CAAwB,SAAUjH,IAAV,CAAgB,CACtC,oBAAkB7C,CAAX,CAAcyJ,GAAd,CAAmB5G,IAAnB,CAAyB0G,QAAzB,CAAP,CACD,CAFD,EAIA,eAAA,CACD,CAED,qBAAA,CAAoBhH,IAApB,CAA0B,CACxB,YAAYuD,IAAL,GAAYiB,OAAZ,CAAoB,MAApB,CAA4B,GAA5B,EAAiCpF,MAAxC,CACD;;;AAKD,sBAAA,CAAqBZ,KAArB,CAA4B,CAC1B,oBAAsBwE,aAAWxE,MAAMwB,IAAN,EAAX,CAAtB,CAEA,aAAexB,MAAMyC,IAAN,CAAW,GAAX,EAAgBjB,IAAhB,EAAf,CACA,eAAiBgD,aAAWyE,QAAX,CAAjB,CAEA,GAAID,gBAAkB,CAAtB,CAAyB,CACvB,kBAAoBA,eAApB,CACD,CAFD,QAEWA,kBAAoB,CAApB,EAAyBE,WAAa,CAA1C,CAA6C,CAClD,QAAA,CACD,CAED,QAAA,CACD;;AAKD,2BAAA,CAA4BjK,CAA5B,CAA+BiU,SAA/B,CAA0CC,WAA1C,CAAuD,CACrD,iBAAmBjH,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,IAAvF,CAEA,eAAiBgH,UAAUE,MAAV,CAAiB,SAAUnJ,IAAV,CAAgB,CAChD,mBAAmBoJ,OAAZ,CAAoBpJ,IAApB,IAA8B,CAAC,CAAtC,CACD,CAFgB,CAAjB,CAIA,8BAAgC,IAAhC,CACA,sBAAwB,KAAxB,CACA,mBAAqBH,SAArB,CAEA,GAAI,CACF,UAAY,cAAA,EAAiB,CAC3B,SAAWwJ,MAAMpJ,KAAjB,CAEA,SAAW,MAAX,CACA,UAAY,OAAZ,CAEA,UAAYjL,EAAE,QAAUsU,IAAV,CAAiB,IAAjB,CAAwBtJ,IAAxB,CAA+B,IAAjC,CAAZ;;;AAKA,WAAauJ,MAAM9V,GAAN,CAAU,SAAU0B,KAAV,CAAiBU,IAAjB,CAAuB,CAC5C,SAASA,IAAF,EAAQgC,IAAR,CAAaoI,KAAb,CAAP,CACD,CAFY,EAEVV,OAFU,GAEA4J,MAFA,CAEO,SAAU5R,IAAV,CAAgB,CAClC,cAAgB,EAAhB,CACD,CAJY,CAAb;;;;AAUA,GAAIiS,OAAO7S,MAAP,GAAkB,CAAtB,CAAyB,CACvB,cAAgB,MAAhB;;AAGA,GAAI8S,YAAJ,CAAkB,CAChBC,UAAYrK,YAAUmK,OAAO,CAAP,CAAV,CAAqBxU,CAArB,CAAZ,CACD,CAFD,IAEO,CACL0U,UAAYF,OAAO,CAAP,CAAZ,CACD,CAED,MAAO,CACLG,EAAGD,SADE,CAAP,CAGD,CACF,CAnCD,CAqCA,IAAK,cAAgBE,eAAaC,UAAb,CAAhB,CAA0CR,KAA/C,CAAsD,EAAES,0BAA4B,CAACT,MAAQU,UAAUxU,IAAV,EAAT,EAA2ByU,IAAzD,CAAtD,CAAsHF,0BAA4B,IAAlJ,CAAwJ,CACtJ,SAAWG,OAAX,CAEA,GAAI,CAAC,WAAA,GAAgB,WAAhB,CAA8B,WAA9B,CAA4CC,UAAQC,IAAR,CAA7C,IAAgE,QAApE,CAA8E,YAAYR,CAAZ,CAC/E;CAGD,MAAO1G,GAAP,CAAY,CACZmH,kBAAoB,IAApB,CACAC,eAAiBpH,GAAjB,CACD,CAhDD,OAgDU,CACR,GAAI,CACF,GAAI,CAAC6G,yBAAD,EAA8BC,UAAUO,MAA5C,CAAoD,CAClDP,UAAUO,MAAV,GACD,CACF,CAJD,OAIU,CACR,GAAIF,iBAAJ,CAAuB,CACrB,oBAAA,CACD,CACF,CACF,CAED,WAAA,CACD,CAED,qBAAA,CAAoBrU,KAApB,CAA2BoJ,WAA3B,CAAwC;;AAGtC,GAAIpJ,MAAMW,QAAN,GAAiBC,MAAjB,CAA0BwI,WAA9B,CAA2C,CACzC,YAAA,CACD;AAED,GAAIoL,iBAAiBxU,KAAjB,CAAJ,CAA6B,CAC3B,YAAA,CACD,CAED,WAAA,CACD;;;AAKD,gCAAA,CAAiCf,CAAjC,CAAoCwV,SAApC,CAA+C,CAC7C,gBAAkBvI,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,CAAtF,CACA,aAAeA,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,IAAnF,CACA,8BAAgC,IAAhC,CACA,sBAAwB,KAAxB,CACA,mBAAqBpC,SAArB,CAEA,GAAI,CACF,IAAK,cAAgB+J,eAAaY,SAAb,CAAhB,CAAyCnB,KAA9C,CAAqD,EAAES,0BAA4B,CAACT,MAAQU,UAAUxU,IAAV,EAAT,EAA2ByU,IAAzD,CAArD,CAAqHF,0BAA4B,IAAjJ,CAAuJ,CACrJ,aAAeT,MAAMpJ,KAArB,CAEA,UAAYjL,EAAEtB,QAAF,CAAZ;;AAIA,GAAI6V,MAAM5S,MAAN,GAAiB,CAArB,CAAwB,CACtB,UAAY3B,EAAEuU,MAAM,CAAN,CAAF,CAAZ,CAEA,GAAIrK,aAAWnJ,KAAX,CAAkBoJ,WAAlB,CAAJ,CAAoC,CAClC,YAAc,MAAd,CACA,GAAIsL,QAAJ,CAAc,CACZ7M,QAAU7H,MAAMwB,IAAN,EAAV,CACD,CAFD,IAEO,CACLqG,QAAU7H,MAAMsB,IAAN,EAAV,CACD,CAED,GAAIuG,OAAJ,CAAa,CACX,cAAA,CACD,CACF,CACF,CACF,CACF,CAAC,MAAOqF,GAAP,CAAY,CACZmH,kBAAoB,IAApB,CACAC,eAAiBpH,GAAjB,CACD,CA5BD,OA4BU,CACR,GAAI,CACF,GAAI,CAAC6G,yBAAD,EAA8BC,UAAUO,MAA5C,CAAoD,CAClDP,UAAUO,MAAV,GACD,CACF,CAJD,OAIU,CACR,GAAIF,iBAAJ,CAAuB,CACrB,oBAAA,CACD,CACF,CACF,CAED,WAAA,CACD;AAGD,oBAAA,CAAmB7S,IAAnB,CAAyBvC,CAAzB,CAA4B;;AAG1B,cAAgBA,EAAE,SAAWuC,IAAX,CAAkB,SAApB,EAA+BA,IAA/B,EAAhB,CACA,mBAAqB,EAAd,CAAmBA,IAAnB,CAA0B+H,SAAjC,CACD,CAED,yBAAA,CAA0BvJ,KAA1B,CAAiC,CAC/B,YAAcA,MAAMgB,OAAN,GAAgBwI,OAAhB,EAAd,CACA,kBAAoBxI,QAAQyB,IAAR,CAAa,SAAU8C,MAAV,CAAkB,CACjD,UAAYpE,WAASoE,MAAT,CAAZ,CACA,cAAgBrE,MAAMyI,KAAtB,CACI3F,GAAK9C,MAAM8C,EADf,CAGA,eAAiB0F,UAAY,GAAZ,CAAkB1F,EAAnC,CACA,kBAAkB6F,QAAX,CAAoB,SAApB,CAAP,CACD,CAPmB,CAApB,CASA,uBAAyBC,SAAzB,CACD;;;AAMD,2BAAA,CAA0B9J,KAA1B,CAAiC,CAC/B,aAAawB,IAAN,GAAauD,IAAb,GAAoBnE,MAApB,EAA8B,GAArC,CACD,CAED,sBAAA,CAAqB3B,CAArB,CAAwB,CACtB,SAASR,gBAAF,EAAkBmC,MAAlB,CAA2B,CAAlC,CACD,CAED,mBAAA,CAAkBd,IAAlB,CAAwB,CACtB,YAAcA,KAAKiK,OAAnB,CACIC,WAAalK,KAAKkK,UADtB,CAIA,GAAI,CAACD,OAAD,EAAYC,UAAhB,CAA4B,CAC1B,UAAYwF,mBAAiBxF,UAAjB,EAA6BtH,MAA7B,CAAoC,SAAUC,GAAV,CAAevD,KAAf,CAAsB,CACpE,SAAW4K,WAAW5K,KAAX,CAAX,CAEAuD,IAAIb,KAAKmI,IAAT,EAAiBnI,KAAKoI,KAAtB,CACA,UAAA,CACD,CALW,CAKT,EALS,CAAZ,CAMA,YAAA,CACD,CAED,cAAA,CACD,CAED,kBAAA,CAAiBpK,IAAjB,CAAuBgC,IAAvB,CAA6BsI,GAA7B,CAAkC,CAChC,GAAItK,KAAKiK,OAAT,CAAkB,CAChBjK,KAAKiK,OAAL,CAAajI,IAAb,EAAqBsI,GAArB,CACD,CAFD,QAEWtK,KAAKkK,UAAT,CAAqB,CAC1BlK,KAAKuK,YAAL,CAAkBvI,IAAlB,CAAwBsI,GAAxB,EACD,CAED,WAAA,CACD,qBAGD,mBAAA,CAAkBtK,IAAlB,CAAwBoB,KAAxB,CAA+B,CAC7B,GAAIpB,KAAKiK,OAAT,CAAkB,CAChBjK,KAAKiK,OAAL,CAAe7I,KAAf,CACD,CAFD,QAEWpB,KAAKkK,UAAT,CAAqB,CAC1B,MAAOlK,KAAKkK,UAAL,CAAgBpJ,MAAhB,CAAyB,CAAhC,CAAmC,CACjCd,KAAKyK,eAAL,CAAqBzK,KAAKkK,UAAL,CAAgB,CAAhB,EAAmBC,IAAxC,EACD,oBAAiB/I,KAAjB,EAAwB6H,OAAxB,CAAgC,SAAU1H,GAAV,CAAe,CAC9CvB,KAAKuK,YAAL,CAAkBhJ,GAAlB,CAAuBH,MAAMG,GAAN,CAAvB,EACD,CAFA,EAGF,CAED,WAAA,CACD;AAID,YAAc,UAAA,CAAW,WAAX,CAAwB,GAAxB,CAAd,CACA,aAAe,UAAA,CAAW,kBAAX,CAA+B,GAA/B,CAAf,CAEA,mBAAqB,CAAC,QAAD,CAAW,OAAX,CAAoB,MAApB,EAA4BxD,IAA5B,CAAiC,GAAjC,CAArB;;;;;AAOA,gCAAA,CAAiCoB,CAAjC,CAAoC,CAClCA,EAAE,KAAF,EAASE,IAAT,CAAc,SAAUsJ,CAAV,CAAagH,GAAb,CAAkB,CAC9B,UAAYtO,WAASsO,GAAT,CAAZ,CAEAD,mBAAiBtO,KAAjB,EAAwB6H,OAAxB,CAAgC,SAAUjH,IAAV,CAAgB,CAC9C,UAAYZ,MAAMY,IAAN,CAAZ,CAEA,GAAIA,OAAS,KAAT,EAAkB6S,QAAQvU,IAAR,CAAa8J,KAAb,CAAlB,EAAyC0K,SAASxU,IAAT,CAAc8J,KAAd,CAA7C,CAAmE,CACjEjL,EAAEwQ,GAAF,EAAO3N,IAAP,CAAY,KAAZ,CAAmBoI,KAAnB,EACD,CACF,CAND,EAOD,CAVD,EAYA,QAAA,CACD,CAED,kBAAA,CAAmB9K,KAAnB,CAA0BU,IAA1B,CAAgC,CAC9B,YAAYyT,IAAL,GAAc,SAArB,CACD,CAED,sBAAA,CAAuBtU,CAAvB,CAA0B,CACxBA,EAAE,GAAF,EAAOwI,KAAP,GAAehG,QAAf,GAA0B2R,MAA1B,CAAiCyB,SAAjC,EAA4CjV,MAA5C,GAEA,QAAA,CACD,CAED,cAAA,CAAeX,CAAf,CAAkB,CAChBA,EAAE6V,cAAF,EAAkBlV,MAAlB,GAEAX,EAAI8V,cAAc9V,CAAd,CAAJ,CACA,QAAA,CACD,CAED,aAAe;;;;;;AAQb+V,OAAQ,eAAA,CAAgBtM,GAAhB,CAAqBuM,gBAArB,CAAuCnH,SAAvC,CAAkD,CACxD,UAAY,IAAZ,CAEA,yBAAyB9B,oBAAoBC,IAApB,CAAyB,gBAAA,EAAmB,CACnE,UAAA,CAAYiJ,aAAZ,CACA,2BAA2B/I,IAApB,CAAyB,iBAAA,CAAkBC,QAAlB,CAA4B,CAC1D,MAAO,CAAP,CAAU,CACR,OAAQA,SAAShE,IAAT,CAAgBgE,SAAS5M,IAAjC,EACE,MAAA,CACE2V,OAAS,MAAT,CAEA,GAAI,CAACF,gBAAL,CAAuB,CACrB7I,SAAS5M,IAAT,CAAgB,CAAhB,CACA,MACD,CAED0V,cAAgB,CACd7H,cAAe,IADD,CAEdC,WAAY,GAFE,CAGdE,QAAS,CACP,eAAgB,WADT,CAEP,iBAAkB,GAFX,CAHK,CAAhB,CAUA2H,OAAS,CAAE/H,KAAM6H,gBAAR,CAA0B9H,SAAU+H,aAApC,CAAT,CACA9I,SAAS5M,IAAT,CAAgB,CAAhB,CACA,MAEF,MAAA,CACE4M,SAAS5M,IAAT,CAAgB,CAAhB,CACA,uBAAuBkJ,GAAhB,CAAqBoF,SAArB,CAAP,CAEF,MAAA,CACEqH,OAAS/I,SAASsC,IAAlB,CAEF,MAAA,CACE,GAAI,CAACyG,OAAOvI,KAAZ,CAAmB,CACjBR,SAAS5M,IAAT,CAAgB,EAAhB,CACA,MACD,CAED,gBAAgBoP,MAAT,CAAgB,QAAhB,CAA0BuG,MAA1B,CAAP,CAEF,OAAA,CACE,gBAAgBvG,MAAT,CAAgB,QAAhB,CAA0BwG,MAAMC,WAAN,CAAkBF,MAAlB,CAA1B,CAAP,CAEF,OAAA,CACA,IAAK,KAAL,CACE,gBAAgB5I,IAAT,EAAP,CA3CJ,CA6CD,CACF,CAhDM,CAgDJwC,OAhDI,CAgDKqG,KAhDL,CAAP,CAiDD,CAnDwB,CAAlB,GAAP,CAoDD,CA/DY,CAgEbC,YAAa,oBAAA,CAAqB5I,IAArB,CAA2B,CACtC,YAAcA,KAAKW,IAAnB,CACID,SAAWV,KAAKU,QADpB,CAEA,gBAAkBA,SAASK,OAAT,CAAiB,cAAjB,CAAlB;;AAKA,GAAI,CAACC,YAAY5D,QAAZ,CAAqB,MAArB,CAAD,EAAiC,CAAC4D,YAAY5D,QAAZ,CAAqB,MAArB,CAAtC,CAAoE,CAClE,eAAM,CAAU,qCAAV,CAAN,CACD,CAED,MAAQyL,UAAQC,IAAR,CAAa1N,OAAb,CAAsB,CAAE2N,oBAAqB,IAAvB,CAAtB,CAAR,CAEA,GAAIvW,EAAE,GAAF,EAAOwI,KAAP,GAAe9G,QAAf,GAA0BC,MAA1B,GAAqC,CAAzC,CAA4C,CAC1C,eAAM,CAAU,kCAAV,CAAN,CACD,CAED3B,EAAIwW,kBAAkBxW,CAAlB,CAAJ,CACAA,EAAIyW,wBAAwBzW,CAAxB,CAAJ,CACAA,EAAI0W,MAAM1W,CAAN,CAAJ,CAEA,QAAA,CACD,CAvFY,CAAf,CA0FA,UAAY,cAAA,CAAe2W,SAAf,CAA0BC,OAA1B,CAAmC,CAC7C,eAAenT,MAAR,CAAe,SAAUC,GAAV,CAAemT,MAAf,CAAuB,CAC3CnT,IAAImT,MAAJ,EAAcF,SAAd,CACA,UAAA,CACD,CAHM,CAGJ,EAHI,CAAP,CAID,CALD,CAOA,8BAAA,CAA+BA,SAA/B,CAA0C,CACxC,iBAAiBG,gBAAV,CAA6BC,MAAMJ,SAAN,CAAiB,CAACA,UAAUE,MAAX,EAAmBlG,MAAnB,CAA0BC,qBAAmB+F,UAAUG,gBAA7B,CAA1B,CAAjB,CAA7B,CAA2HC,MAAMJ,SAAN,CAAiB,CAACA,UAAUE,MAAX,CAAjB,CAAlI,CACD,CAED,qBAAuB,CACrBA,OAAQ,cADa,CAErBjO,QAAS;;;AAIP4M,UAAW,CAAC,wBAAD,CAJJ;AAOPkB,MAAO,EAPA;AAUPM,WAAY,CACVC,SAAU,KADA,CAVL,CAFY,CAiBrBC,OAAQ,CACN1B,UAAW,CAAC,mBAAD,CADL,CAjBa,CAqBrBxB,MAAO,CACLwB,UAAW,CAAC,gBAAD,CADN,CArBc,CAyBrB2B,eAAgB,CACd3B,UAAW,CAAC,kBAAD,CADG,CAzBK,CAAvB,CA8BA,mBAAqB,CACnBqB,OAAQ,WADW,CAEnBjO,QAAS;AAEP4M,UAAW,CAAC,qBAAD,CAAwB,cAAxB,CAAwC,iBAAxC,CAFJ;AAKPkB,MAAO,CAAC,KAAD,CAAQ,uBAAR,CALA;;;;;;AAaPM,WAAY;AAEVI,GAAI,IAFM;AAKVH,SAAU,iBAAA,CAAkBlW,KAAlB,CAAyBf,CAAzB,CAA4B,CACpC,GAAIA,EAAEsC,OAAN,CAAe,CACb,cAAgBtC,EAAEe,MAAMwB,IAAN,EAAF,CAAhB,CAEA,GAAI8U,UAAU1V,MAAV,GAAqB,CAArB,EAA0B0V,UAAU7W,GAAV,CAAc,CAAd,IAAqBqK,SAA/C,EAA4DwM,UAAU7W,GAAV,CAAc,CAAd,EAAiBC,OAAjB,CAAyBC,WAAzB,KAA2C,KAA3G,CAAkH,CAChH,MAAO,QAAP,CACD,CACF,CAND,IAMO,CACL,eAAiBK,MAAMW,QAAN,EAAjB,CACA,GAAI4V,WAAW3V,MAAX,GAAsB,CAAtB,EAA2B2V,WAAW9W,GAAX,CAAe,CAAf,EAAkBC,OAAlB,GAA8B,KAA7D,CAAoE,CAClE,MAAO,QAAP,CACD,CACF,CAED,WAAA,CACD,CApBS,CAbL,CAFU,CAuCnBuT,MAAO,CACLwB,UAAW,CAAC,uBAAD,CAA0B,qBAA1B,CAAiD,IAAjD,CADN,CAvCY,CA2CnB0B,OAAQ,CACN1B,UAAW,CAAC,aAAD,CAAgB,sBAAhB,CADL,CA3CW,CA+CnB+B,IAAK,CACH/B,UAAW,CAAC,sBAAD,CADR,CA/Cc,CAmDnB2B,eAAgB,CACd3B,UAAW,CAAC,CAAC,kCAAD,CAAqC,UAArC,CAAD,CAAmD,wBAAnD,CADG,CAnDG,CAArB,CAwDA,uBAAyB,CACvBqB,OAAQ,eADe,CAEvBjO,QAAS,CACP4M,UAAW,CAAC,kBAAD,CADJ,CAGPgC,eAAgB,KAHT;AAMPR,WAAY,CACV,eAAgB,mBAAA,CAAoBjW,KAApB,CAA2B,CACzC,YAAcA,MAAMgB,OAAN,CAAc,UAAd,CAAd;AAEA,GAAI4E,QAAQjF,QAAR,CAAiB,KAAjB,EAAwBC,MAAxB,GAAmC,CAAvC,CAA0C,CACxCgF,QAAQ8Q,OAAR,CAAgB1W,KAAhB,EACD,CACF,CAPS,CAQV,mBAAoB,YARV,CASV,WAAY,QATF,CANL;AAmBP2V,MAAO,CAAC,iBAAD,CAAoB,oCAApB,CAA0D,MAA1D,CAAkE,SAAlE,CAnBA,CAFc,CAyBvBQ,OAAQ,wBAzBe,CA2BvBlD,MAAO,CACLwB,UAAW,CAAC,UAAD,CADN,CA3BgB,CA+BvB2B,eAAgB,CACd3B,UAAW,CAAC,sBAAD,CADG,CA/BO,CAAzB,CAqCA,qBAAuB,CACrBqB,OAAQ,aADa,CAGrBjO,QAAS,CACPoO,WAAY;;;;AAKV,wBAAyB,0BAAA,CAA2BjW,KAA3B,CAAkCf,CAAlC,CAAqC,CAC5D,WAAae,MAAMyC,IAAN,CAAW,QAAX,CAAb,CACA,oBAAsBxD,EAAE,iCAAF,CAAtB,CACA0X,gBAAgBzP,MAAhB,CAAuB0P,MAAvB,EACA5W,MAAMM,WAAN,CAAkBqW,eAAlB,EACD,CAVS;;AAcVE,EAAG,MAdO,CADL,CAkBPpC,UAAW,CAAC,uBAAD,CAlBJ,CAoBPgC,eAAgB,KApBT,CAsBPd,MAAO,CAAC,qBAAD,CAAwB,QAAxB,CAAkC,sBAAlC,CAtBA,CAHY,CA4BrBQ,OAAQ,CACN1B,UAAW,CAAC,kCAAD,CADL,CA5Ba,CAgCrB2B,eAAgB,CACd3B,UAAW,CAAC,CAAC,4CAAD,CAA+C,cAA/C,CAAD,CADG,CAhCK,CAAvB,CAsCA,qBAAuB,CACrBqB,OAAQ,iBADa,CAGrB7C,MAAO,CACLwB,UAAW,CAAC,aAAD,CAAgB,aAAhB,CADN,CAHc,CAOrB0B,OAAQ,CACN1B,UAAW,CAAC,CAAC,qBAAD,CAAwB,OAAxB,CAAD,CAAmC,WAAnC,CAAgD,SAAhD,CADL,CAPa,CAWrB5M,QAAS,CACP4M,UAAW,CAAC,cAAD,CAAiB,eAAjB,CADJ,CAGPgC,eAAgB,KAHT,CAKPR,WAAY,CACV,aAAc,iBAAA,CAAkBjW,KAAlB,CAAyB,CACrC,QAAUA,MAAM8B,IAAN,CAAW,KAAX,CAAV;;;;;;;;;AAUA,UAAY,GAAZ,CAEAgV,IAAMA,IAAI9Q,OAAJ,CAAY,UAAZ,CAAwBjE,KAAxB,CAAN,CACA/B,MAAM8B,IAAN,CAAW,KAAX,CAAkBgV,GAAlB,EACD,CAhBS,CALL,CAwBPnB,MAAO,CAAC,KAAD,CAAQ,qBAAR,CAA+B,2BAA/B,CAA4D,kBAA5D,CAAgF,mBAAhF,CAAqG,QAArG,CAA+G,kBAA/G,CAAmI,SAAnI,CAxBA,CAXY,CAsCrBS,eAAgB,IAtCK,CAwCrBW,eAAgB,IAxCK,CA0CrBP,IAAK,IA1CgB,CA4CrBQ,cAAe,IA5CM,CA8CrBC,QAAS,IA9CY,CAAvB;;AAmDA,yBAA2B,CACzBnB,OAAQ,qBADiB,CAEzB7C,MAAO,CACLwB,UAAW,CAAC,QAAD,CADN,CAFkB,CAMzB0B,OAAQ,CACN1B,UAAW,CAAC,0DAAD,CADL,CANiB,CAUzB5M,QAAS,CACP4M,UAAW,CAAC,eAAD,CADJ;;AAKPwB,WAAY,EALL;;;AAUPN,MAAO,EAVA,CAVgB,CAuBzBS,eAAgB,CACd3B,UAAW,CAAC,CAAC,gCAAD,CAAmC,UAAnC,CAAD,CADG,CAvBS,CA2BzBsC,eAAgB,IA3BS,CA6BzBP,IAAK,IA7BoB,CA+BzBQ,cAAe,IA/BU,CAiCzBC,QAAS,IAjCgB,CAA3B;;;AAuCA,uBAAyB,CACvBnB,OAAQ,mBADe,CAEvB7C,MAAO,CACLwB,UAAW,CAAC,UAAD,CADN,CAFgB,CAMvB0B,OAAQ,CACN1B,UAAW,CAAC,eAAD,CADL,CANe,CAUvB5M,QAAS,CACP4M,UAAW,CAAC,iBAAD,CAAoB,iBAApB,CADJ;;AAKPwB,WAAY,EALL;;;AAUPN,MAAO,EAVA,CAVc,CAuBvBS,eAAgB,CACd3B,UAAW,CAAC,CAAC,qCAAD,CAAwC,OAAxC,CAAD,CADG,CAvBO,CA2BvBsC,eAAgB,CACdtC,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CA3BO,CA+BvB+B,IAAK,CACH/B,UAAW,CAAC,CAAC,6BAAD,CAAgC,OAAhC,CAAD,CADR,CA/BkB,CAmCvBuC,cAAe,IAnCQ,CAqCvBC,QAAS,IArCc,CAAzB;;;AA2CA,mBAAqB,CACnBnB,OAAQ,eADW,CAEnB7C,MAAO,CACLwB,UAAW,CAAC,eAAD,CADN,CAFY,CAMnB0B,OAAQ,CACN1B,UAAW,CAAC,iBAAD,CADL,CANW,CAUnB5M,QAAS,CACP4M,UAAW,CAAC,iBAAD,CADJ;;AAKPwB,WAAY,EALL;;;AAUPN,MAAO,CAAC,kBAAD,CAVA,CAVU,CAuBnBS,eAAgB,CACd3B,UAAW,CAAC,CAAC,gCAAD,CAAmC,OAAnC,CAAD,CADG,CAvBG,CA2BnBsC,eAAgB,CACdtC,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CA3BG,CA+BnB+B,IAAK,CACH/B,UAAW,CAAC,CAAC,6BAAD,CAAgC,OAAhC,CAAD,CADR,CA/Bc,CAmCnBuC,cAAe,IAnCI,CAqCnBC,QAAS,IArCU,CAArB;;;AA2CA,iBAAmB,CACjBnB,OAAQ,aADS,CAEjB7C,MAAO,CACLwB,UAAW,CAAC,IAAD,CADN,CAFU,CAMjB0B,OAAQ,CACN1B,UAAW,CAAC,qBAAD,CADL,CANS,CAUjB5M,QAAS,CACP4M,UAAW,CAAC,cAAD,CADJ;;AAKPwB,WAAY,EALL;;;AAUPN,MAAO,CAAC,cAAD,CAVA,CAVQ,CAuBjBS,eAAgB,CACd3B,UAAW,CAAC,WAAD,CADG,CAvBC,CA2BjBsC,eAAgB,CACdtC,UAAW,EADG,CA3BC,CA+BjB+B,IAAK,CACH/B,UAAW,CAAC,CAAC,0BAAD,CAA6B,OAA7B,CAAD,CADR,CA/BY,CAmCjBuC,cAAe,IAnCE,CAqCjBC,QAAS,IArCQ,CAAnB;;;AA2CA,mBAAqB,CACnBnB,OAAQ,eADW,CAEnB7C,MAAO,CACLwB,UAAW,CAAC,sBAAD,CADN,CAFY,CAMnB0B,OAAQ,CACN1B,UAAW,CAAC,oBAAD,CADL,CANW,CAUnB5M,QAAS,CACP4M,UAAW;AAEX,iBAFW,CADJ;;AAOPwB,WAAY,EAPL;;;AAYPN,MAAO,CAAC,iBAAD,CAZA,CAVU,CAyBnBS,eAAgB,CACd3B,UAAW,CAAC,CAAC,qBAAD,CAAwB,UAAxB,CAAD,CADG,CAzBG,CA6BnBsC,eAAgB,CACdtC,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CA7BG,CAiCnB+B,IAAK,CACH/B,UAAW,CAAC,CAAC,6BAAD,CAAgC,OAAhC,CAAD,CADR,CAjCc,CAqCnBuC,cAAe,IArCI,CAuCnBC,QAAS,IAvCU,CAArB;;;AA6CA,sBAAwB,CACtBnB,OAAQ,kBADc,CAEtB7C,MAAO,CACLwB,UAAW,CAAC,qBAAD,CADN,CAFe,CAMtB0B,OAAQ,CACN1B,UAAW,CAAC,gCAAD,CAAmC,gBAAnC,CADL,CANc,CAUtB5M,QAAS,CACP4M,UAAW,CAAC,gBAAD,CADJ,CAGPgC,eAAgB,KAHT;;AAOPR,WAAY,CACViB,GAAI,GADM,CAPL;;;AAcPvB,MAAO,CAAC,oBAAD,CAAuB,uEAAvB,CAAgG,YAAhG,CAdA,CAVa,CA2BtBS,eAAgB,CACd3B,UAAW,CAAC,gBAAD,CADG,CA3BM,CA+BtBsC,eAAgB,CACdtC,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CA/BM,CAmCtB+B,IAAK,CACH/B,UAAW,CAAC,CAAC,0BAAD,CAA6B,OAA7B,CAAD,CADR,CAnCiB,CAuCtBuC,cAAe,IAvCO,CAyCtBC,QAAS,IAzCa,CAAxB;;;AA+CA,mBAAqB,CACnBnB,OAAQ,kBADW,CAEnB7C,MAAO,CACLwB,UAAW,CAAC,gBAAD,CADN,CAFY,CAMnB0B,OAAQ,CACN1B,UAAW,CAAC,eAAD,CAAkB,KAAlB,CADL,CANW,CAUnB5M,QAAS,CACP4M,UAAW,CAAC,eAAD,CAAkB,gBAAlB,CADJ;;AAKPwB,WAAY,EALL;;;AAUPN,MAAO,EAVA,CAVU,CAuBnBS,eAAgB,CACd3B,UAAW,CAAC,CAAC,qCAAD,CAAwC,OAAxC,CAAD,CADG,CAvBG,CA2BnBsC,eAAgB,CACdtC,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CA3BG,CA+BnB+B,IAAK,CACH/B,UAAW,CAAC,CAAC,6BAAD,CAAgC,OAAhC,CAAD,CADR,CA/Bc,CAmCnBuC,cAAe,IAnCI,CAqCnBC,QAAS,IArCU,CAArB;;;AA2CA,0BAA4B,CAC1BnB,OAAQ,sBADkB,CAE1B7C,MAAO,CACLwB,UAAW,CAAC,eAAD,CADN,CAFmB,CAM1B0B,OAAQ,CACN1B,UAAW,CAAC,CAAC,qBAAD,CAAwB,OAAxB,CAAD,CADL,CANkB,CAU1B5M,QAAS,CACP4M,UAAW;AAEX,mBAFW,CAEU,kBAFV,CADJ;;AAOPwB,WAAY,EAPL;;;AAYPN,MAAO,EAZA,CAViB,CAyB1BoB,eAAgB,CACdtC,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CAzBU,CA6B1BuC,cAAe,IA7BW,CA+B1BC,QAAS,IA/BiB,CAA5B;;;AAqCA,sBAAwB,CACtBnB,OAAQ,kBADc,CAEtB7C,MAAO,CACLwB,UAAW;AAEX,CAAC,uBAAD,CAA0B,OAA1B,CAFW,CADN,CAFe,CAQtB0B,OAAQ,CACN1B,UAAW,CAAC,oCAAD,CADL,CARc,CAYtB5M,QAAS,CACP4M,UAAW;AAEX,qBAFW,CAEY,gBAFZ,CAE8B,aAF9B,CAE6C,aAF7C,CADJ;;AAOPwB,WAAY,EAPL;;;AAYPN,MAAO,CAAC,YAAD,CAZA,CAZa,CA2BtBS,eAAgB,CACd3B,UAAW,CAAC,CAAC,+CAAD,CAAkD,UAAlD,CAAD,CADG,CA3BM,CA+BtBsC,eAAgB,CACdtC,UAAW;AAEX,CAAC,uBAAD,CAA0B,OAA1B,CAFW,CADG,CA/BM,CAqCtB+B,IAAK,CACH/B,UAAW,CAAC,CAAC,0BAAD,CAA6B,OAA7B,CAAD,CADR,CArCiB,CAyCtBuC,cAAe,IAzCO,CA2CtBC,QAAS,IA3Ca,CAAxB,CA8CA,sBAAwB,CACtBnB,OAAQ,cADc,CAGtBC,iBAAkB,CAAC,aAAD,CAAgB,gBAAhB,CAAkC,YAAlC,CAAgD,aAAhD,CAA+D,cAA/D,CAA+E,WAA/E,CAHI,CAKtB9C,MAAO,CACLwB,UAAW,CAAC,aAAD,CADN,CALe,CAStB0B,OAAQ,CACN1B,UAAW,CAAC,SAAD,CADL,CATc,CAatB5M,QAAS,CACP4M,UAAW,CAAC,eAAD,CAAkB,gBAAlB,CADJ;;AAKPwB,WAAY,CACV,mDAAoD,6CAAA,CAA8CjW,KAA9C,CAAqD,CACvG,cAAgBA,MAAM8B,IAAN,CAAW,IAAX,EAAiBkQ,KAAjB,CAAuB,UAAvB,EAAmC,CAAnC,CAAhB,CACAhS,MAAM8B,IAAN,CAAW,KAAX,CAAkB,iCAAmCqV,SAArD,EACD,CAJS,CALL;;;AAePxB,MAAO,EAfA,CAba,CA+BtBS,eAAgB,CACd3B,UAAW,CAAC,CAAC,wBAAD,CAA2B,UAA3B,CAAD,CADG,CA/BM,CAmCtBsC,eAAgB,CACdtC,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CAnCM,CAuCtB+B,IAAK,CACH/B,UAAW;CADR,CAvCiB,CA6CtBuC,cAAe,CACbvC,UAAW;CADE,CA7CO,CAmDtBwC,QAAS,CACPxC,UAAW;CADJ,CAnDa,CAAxB;;;AA6DA,2BAA6B,CAC3BqB,OAAQ,uBADmB,CAE3B7C,MAAO,CACLwB,UAAW,CAAC,kBAAD,CADN,CAFoB,CAM3B0B,OAAQ,CACN1B,UAAW,CAAC,uBAAD,CADL,CANmB,CAU3B5M,QAAS,CACP4M,UAAW,CAAC,2BAAD,CADJ;;AAKPwB,WAAY,EALL;;;AAUPN,MAAO,EAVA,CAVkB,CAuB3BS,eAAgB,CACd3B,UAAW,CAAC,CAAC,8BAAD,CAAiC,OAAjC,CAAD,CADG,CAvBW,CA2B3BsC,eAAgB,CACdtC,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CA3BW,CA+B3B+B,IAAK,CACH/B,UAAW,CAAC,CAAC,6BAAD,CAAgC,OAAhC,CAAD,CADR,CA/BsB,CAmC3BuC,cAAe,CACbvC,UAAW;CADE,CAnCY,CAyC3BwC,QAAS,CACPxC,UAAW;CADJ,CAzCkB,CAA7B;;;AAmDA,8BAAgC,CAC9BqB,OAAQ,0BADsB,CAE9B7C,MAAO,CACLwB,UAAW,CAAC,aAAD,CADN,CAFuB,CAM9B0B,OAAQ,CACN1B,UAAW,CAAC,mBAAD,CADL,CANsB,CAU9B5M,QAAS,CACP4M,UAAW,CAAC,mBAAD,CADJ;;AAKPwB,WAAY,CACV,iDAAkD,8CAAA,CAA+CjW,KAA/C,CAAsDf,CAAtD,CAAyD,CACzG,SAAWmY,KAAKpJ,KAAL,CAAWhO,MAAM8B,IAAN,CAAW,YAAX,CAAX,CAAX,CACA,QAAUuV,KAAKC,OAAL,CAAa,CAAb,EAAgBR,GAA1B,CAEA,SAAW7X,EAAE,SAAF,EAAa6C,IAAb,CAAkB,KAAlB,CAAyBgV,GAAzB,CAAX,CACA9W,MAAMM,WAAN,CAAkBqB,IAAlB,EACD,CAPS,CALL;;;AAkBPgU,MAAO,EAlBA,CAVqB,CA+B9BS,eAAgB,CACd3B,UAAW,CAAC,CAAC,kCAAD,CAAqC,UAArC,CAAD,CADG,CA/Bc,CAmC9BsC,eAAgB,CACdtC,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CAnCc,CAuC9B+B,IAAK,CACH/B,UAAW,CAAC,CAAC,wBAAD,CAA2B,OAA3B,CAAD,CADR,CAvCyB,CA2C9BuC,cAAe,CACbvC,UAAW;CADE,CA3Ce,CAiD9BwC,QAAS,CACPxC,UAAW;CADJ,CAjDqB,CAAhC,CAwDA,oBAAsB,CACpBqB,OAAQ,YADY,CAGpBC,iBAAkB,CAAC,4BAAD,CAHE,CAKpB9C,MAAO,CACLwB,UAAW,CAAC,IAAD,CADN,CALa,CASpB0B,OAAQ,CACN1B,UAAW,CAAC,CAAC,qBAAD,CAAwB,OAAxB,CAAD,CADL,CATY,CAapB5M,QAAS,CACP4M,UAAW,CAAC,kBAAD,CADJ;;AAKPwB,WAAY;AAEVsB,OAAQ,eAAA,CAAgBvX,KAAhB,CAAuB,CAC7B,SAAW,kEAAX,CACA,UAAYwX,mBAAmBxX,MAAM8B,IAAN,CAAW,gBAAX,CAAnB,CAAZ,CAEA,GAAI2V,KAAKrX,IAAL,CAAUsX,KAAV,CAAJ,CAAsB,CACpB,iBAAmBA,MAAMrT,KAAN,CAAYoT,IAAZ,CAAnB,CACIE,cAAgBrG,iBAAesG,YAAf,CAA6B,CAA7B,CADpB,CAEInP,EAAIkP,cAAc,CAAd,CAFR,CAGIR,UAAYQ,cAAc,CAAd,CAHhB;AAMA3X,MAAM8B,IAAN,CAAW,KAAX,CAAkB,iCAAmCqV,SAArD,EACA,YAAcnX,MAAMgB,OAAN,CAAc,QAAd,CAAd,CACA4E,QAAQ8Q,OAAR,CAAgB1W,MAAM6X,KAAN,EAAhB,EACA7X,MAAMJ,MAAN,GACD,CACF,CAlBS,CALL;;;AA6BP+V,MAAO,EA7BA,CAbW,CA6CpBS,eAAgB,CACd3B,UAAW,CAAC,CAAC,gBAAD,CAAmB,UAAnB,CAAD,CADG,CA7CI,CAiDpBsC,eAAgB,CACdtC,UAAW,CAAC,CAAC,uBAAD,CAA0B,OAA1B,CAAD,CADG,CAjDI,CAqDpB+B,IAAK,CACH/B,UAAW;CADR,CArDe,CA2DpBuC,cAAe,CACbvC,UAAW;CADE,CA3DK,CAiEpBwC,QAAS,CACPxC,UAAW;CADJ,CAjEW,CAAtB,CA0EA,qBAAuB,eAAc,CACpCqD,iBAAkBA,gBADkB,CAEpCC,eAAgBA,cAFoB,CAGpCC,mBAAoBA,kBAHgB,CAIpCC,iBAAkBA,gBAJkB,CAKpCC,iBAAkBA,gBALkB,CAMpCC,qBAAsBA,oBANc,CAOpCC,mBAAoBA,kBAPgB,CAQpCC,eAAgBA,cARoB,CASpCC,aAAcA,YATsB,CAUpCC,eAAgBA,cAVoB,CAWpCC,kBAAmBA,iBAXiB,CAYpCC,eAAgBA,cAZoB,CAapCC,sBAAuBA,qBAba,CAcpCC,kBAAmBA,iBAdiB,CAepCC,kBAAmBA,iBAfiB,CAgBpCC,uBAAwBA,sBAhBY,CAiBpCC,0BAA2BA,yBAjBS,CAkBpCC,gBAAiBA,eAlBmB,CAAd,CAAvB,CAqBA,eAAiBC,aAAaC,gBAAb,EAA+BvW,MAA/B,CAAsC,SAAUC,GAAV,CAAetB,GAAf,CAAoB,CACzE,cAAgB4X,iBAAiB5X,GAAjB,CAAhB,CACA,kBAAgB,EAAT,CAAasB,GAAb,CAAkBuW,sBAAsBtD,SAAtB,CAAlB,CAAP,CACD,CAHgB,CAGd,EAHc,CAAjB;AAMA,oBAAsB,wCAAtB;;AAIA,iBAAmB,UAAA,CAAW,aAAX,CAA0B,GAA1B,CAAnB;;;;;;;;;;;;;;;AAoBA,mBAAqB,WAArB,CACA,oBAAsB,WAAtB,CACA,yBAA2B,4BAA3B,CACA,2BAA6B,oBAA7B,CACA,0BAA4B,QAA5B,CACA,WAAa,CAAC,KAAD,CAAQ,KAAR,CAAe,KAAf,CAAsB,KAAtB,CAA6B,KAA7B,CAAoC,KAApC,CAA2C,KAA3C,CAAkD,KAAlD,CAAyD,KAAzD,CAAgE,KAAhE,CAAuE,KAAvE,CAA8E,KAA9E,CAAb,CACA,cAAgBuD,OAAOtb,IAAP,CAAY,GAAZ,CAAhB,CACA,eAAiB,qCAAjB,CACA,eAAiB,wCAAjB,CACA,sBAAwB,UAAA,CAAW,IAAMub,UAAN,CAAmB,KAAnB,CAA2BC,UAA3B,CAAwC,kBAAxC,CAA6DC,SAA7D,CAAyE,GAApF,CAAyF,IAAzF,CAAxB;;;AAKA,uBAAyB,gBAAzB,CAEA,sBAAwB,UAAA,CAAW,2BAAX,CAAwC,GAAxC,CAAxB;;AAIA,oBAAA,CAAqBnD,MAArB,CAA6B,CAC3B,cAAcnQ,OAAP,CAAeuT,eAAf,CAAgC,IAAhC,EAAsCxU,IAAtC,EAAP,CACD,CAED,gBAAA,CAAiByU,YAAjB,CAA+B,CAC7BA,aAAeA,aAAazU,IAAb,EAAf,CACA,GAAI0U,WAASC,QAAT,CAAkBF,YAAlB,CAAJ,CAAqC,CACnC,mBAAA,CACD,CAED,WAAA,CACD;;AAID,iBAAA,CAAkBhD,GAAlB,CAAuB/J,IAAvB,CAA6B,CAC3B,MAAQA,KAAKxN,CAAb,CACIgY,QAAUxK,KAAKwK,OADnB;AAIA,GAAIT,IAAI5V,MAAJ,CAAa,IAAb,EAAqB4V,IAAI5V,MAAJ,CAAa,CAAtC,CAAyC,WAAA;AAGzC,GAAIqW,SAAW0C,iBAAe1C,OAAf,CAAwB,EAAxB,IAAgC0C,iBAAenD,GAAf,CAAoB,EAApB,CAA/C,CAAwE,WAAA,CAExE,YAAclN,YAAUkN,GAAV,CAAevX,CAAf,CAAd;;AAIA,GAAI2a,aAAaxZ,IAAb,CAAkByZ,OAAlB,CAAJ,CAAgC,WAAA,CAEhC,eAAe9U,IAAR,EAAP,CACD;;;AAMD,wBAAA,CAAyB+U,UAAzB,CAAqC,CACnC,MAAO,CAACA,WAAWzV,KAAX,CAAiB0V,iBAAjB,GAAuC,EAAxC,EAA4Clc,IAA5C,CAAiD,GAAjD,EAAsDmI,OAAtD,CAA8DgU,qBAA9D,CAAqF,GAArF,EAA0FhU,OAA1F,CAAkGiU,sBAAlG,CAA0H,UAA1H,EAAsIjU,OAAtI,CAA8IkU,oBAA9I,CAAoK,IAApK,EAA0KnV,IAA1K,EAAP,CACD;;AAID,2BAAA,CAA4B+U,UAA5B,CAAwC;AAEtC,GAAIK,eAAe/Z,IAAf,CAAoB0Z,UAApB,GAAmCM,gBAAgBha,IAAhB,CAAqB0Z,UAArB,CAAvC,CAAyE,CACvEA,WAAajY,SAASiY,UAAT,CAAqB,EAArB,CAAb,CACD,CAED,SAAWO,SAAO,QAAA,CAASP,UAAT,CAAP,CAAX,CAEA,GAAI,CAACQ,KAAKC,OAAL,EAAL,CAAqB,CACnBT,WAAaU,gBAAgBV,UAAhB,CAAb,CACAQ,KAAOD,SAAO,QAAA,CAASP,UAAT,CAAP,CAAP,CACD,CAED,YAAYS,OAAL,GAAiBD,KAAKG,WAAL,EAAjB,CAAsC,IAA7C,CACD;AAID,yBAAA,CAA0BtY,OAA1B,CAAmCsK,IAAnC,CAAyC,CACvC,MAAQA,KAAKxN,CAAb,CACIyb,sBAAwBjO,KAAKkO,kBADjC,CAEIA,mBAAqBD,wBAA0B5Q,SAA1B,CAAsC,IAAtC,CAA6C4Q,qBAFtE,CAGIE,WAAanO,KAAKwG,KAHtB,CAIIA,MAAQ2H,aAAe9Q,SAAf,CAA2B,EAA3B,CAAgC8Q,UAJ5C,CAKIC,SAAWpO,KAAK/D,GALpB,CAMIA,IAAMmS,WAAa/Q,SAAb,CAAyB,EAAzB,CAA8B+Q,QANxC,CAOIC,oBAAsBrO,KAAKgK,cAP/B,CAQIA,eAAiBqE,sBAAwBhR,SAAxB,CAAoC,IAApC,CAA2CgR,mBARhE;;AAYAC,mBAAmB5Y,OAAnB,CAA4BlD,CAA5B;;;AAKA,GAAIwX,cAAJ,CAAoBuE,cAAY7Y,OAAZ,CAAqBlD,CAArB;;;AAKpBgc,aAAW9Y,OAAX,CAAoBlD,CAApB,CAAuByJ,GAAvB;;AAIAxG,gBAAcC,OAAd,CAAuBlD,CAAvB;;;AAKAic,cAAc/Y,OAAd,CAAuBlD,CAAvB;AAGAkc,eAAahZ,OAAb,CAAsBlD,CAAtB,CAAyBgU,KAAzB;AAGAmI,qBAAqBjZ,OAArB,CAA8BlD,CAA9B,CAAiCyJ,GAAjC;;;;AAMA,GAAI+N,cAAJ,CAAoB/C,aAAavR,OAAb,CAAsBlD,CAAtB,CAAyB0b,kBAAzB;AAGpBU,cAAYlZ,OAAZ,CAAqBlD,CAArB;AAGAqc,mBAAmBnZ,OAAnB,CAA4BlD,CAA5B,EAEA,cAAA,CACD,CAED,sBAAA,CAAuBgU,KAAvB,CAA8BxG,IAA9B,CAAoC,CAClC,QAAUA,KAAK/D,GAAf,CACIzJ,EAAIwN,KAAKxN,CADb;;AAKA,GAAIsc,mBAAmBnb,IAAnB,CAAwB6S,KAAxB,CAAJ,CAAoC,CAClCA,MAAQuI,kBAAkBvI,KAAlB,CAAyBvK,GAAzB,CAAR,CACD;;AAID,GAAIuK,MAAMrS,MAAN,CAAe,GAAnB,CAAwB;AAEtB,OAAS3B,EAAE,IAAF,CAAT,CACA,GAAIoX,GAAGzV,MAAH,GAAc,CAAlB,CAAqB,CACnBqS,MAAQoD,GAAG7U,IAAH,EAAR,CACD,CACF;AAGD,mBAAiByR,KAAV,CAAiBhU,CAAjB,EAAoB8F,IAApB,EAAP,CACD,CAED,+BAAA,CAAgC0W,UAAhC,CAA4Cja,IAA5C,CAAkD;;;AAIhD,GAAIia,WAAW7a,MAAX,EAAqB,CAAzB,CAA4B,CAC1B,SAAW,UAAY;;;AAIrB,eAAiB6a,WAAW/Y,MAAX,CAAkB,SAAUC,GAAV,CAAe+Y,SAAf,CAA0B,CAC3D/Y,IAAI+Y,SAAJ,EAAiB/Y,IAAI+Y,SAAJ,EAAiB/Y,IAAI+Y,SAAJ,EAAiB,CAAlC,CAAsC,CAAvD,CACA,UAAA,CACD,CAHgB,CAGd,EAHc,CAAjB,CAKA,0BAA4BlM,mBAAiBmM,UAAjB,EAA6BjZ,MAA7B,CAAoC,SAAUC,GAAV,CAAetB,GAAf,CAAoB,CAClF,GAAIsB,IAAI,CAAJ,EAASgZ,WAAWta,GAAX,CAAb,CAA8B,CAC5B,MAAO,CAACA,GAAD,CAAMsa,WAAWta,GAAX,CAAN,CAAP,CACD,CAED,UAAA,CACD,CAN2B,CAMzB,CAAC,CAAD,CAAI,CAAJ,CANyB,CAA5B,CAOIua,uBAAyBtK,iBAAeuK,qBAAf,CAAsC,CAAtC,CAP7B,CAQIC,QAAUF,uBAAuB,CAAvB,CARd,CASIG,UAAYH,uBAAuB,CAAvB,CAThB;;;;AAiBA,GAAIG,WAAa,CAAb,EAAkBD,QAAQlb,MAAR,EAAkB,CAAxC,CAA2C,CACzC6a,WAAaja,KAAKwQ,KAAL,CAAW8J,OAAX,CAAb,CACD,CAED,cAAgB,CAACL,WAAW,CAAX,CAAD,CAAgBA,WAAWzW,KAAX,CAAiB,CAAC,CAAlB,CAAhB,CAAhB,CACA,eAAiBgX,UAAUtZ,MAAV,CAAiB,SAAUC,GAAV,CAAe2J,GAAf,CAAoB,CACpD,WAAW1L,MAAJ,CAAa0L,IAAI1L,MAAjB,CAA0B+B,GAA1B,CAAgC2J,GAAvC,CACD,CAFgB,CAEd,EAFc,CAAjB,CAIA,GAAI2P,WAAWrb,MAAX,CAAoB,EAAxB,CAA4B,CAC1B,MAAO,CACLgT,EAAGqI,UADE,CAAP,CAGD,CAED,MAAO,CACLrI,EAAGpS,IADE,CAAP,CAGD,CA5CU,EAAX,CA8CA,GAAI,CAAC,WAAA,GAAgB,WAAhB,CAA8B,WAA9B,CAA4C2S,UAAQC,IAAR,CAA7C,IAAgE,QAApE,CAA8E,YAAYR,CAAZ,CAC/E,CAED,WAAA,CACD,CAED,6BAAA,CAA8B6H,UAA9B,CAA0C/S,GAA1C,CAA+C;;;;;;AAO7C,eAAiBE,MAAIoF,KAAJ,CAAUtF,GAAV,CAAjB,CACIwJ,KAAOvC,WAAWuC,IADtB,CAGA,gBAAkBA,KAAKlM,OAAL,CAAakW,iBAAb,CAAgC,EAAhC,CAAlB,CAEA,cAAgBT,WAAW,CAAX,EAAc9b,WAAd,GAA4BqG,OAA5B,CAAoC,GAApC,CAAyC,EAAzC,CAAhB,CACA,mBAAqBmW,QAAMC,WAAN,CAAkBC,SAAlB,CAA6BC,WAA7B,CAArB,CAEA,GAAIC,eAAiB,GAAjB,EAAwBF,UAAUzb,MAAV,CAAmB,CAA/C,CAAkD,CAChD,kBAAkBoE,KAAX,CAAiB,CAAjB,EAAoBnH,IAApB,CAAyB,EAAzB,CAAP,CACD,CAED,YAAc4d,WAAWzW,KAAX,CAAiB,CAAC,CAAlB,EAAqB,CAArB,EAAwBrF,WAAxB,GAAsCqG,OAAtC,CAA8C,GAA9C,CAAmD,EAAnD,CAAd,CACA,iBAAmBmW,QAAMC,WAAN,CAAkBI,OAAlB,CAA2BF,WAA3B,CAAnB,CAEA,GAAIG,aAAe,GAAf,EAAsBD,QAAQ5b,MAAR,EAAkB,CAA5C,CAA+C,CAC7C,kBAAkBoE,KAAX,CAAiB,CAAjB,CAAoB,CAAC,CAArB,EAAwBnH,IAAxB,CAA6B,EAA7B,CAAP,CACD,CAED,WAAA,CACD;;AAID,0BAAA,CAA2BoV,KAA3B,CAAkC,CAChC,QAAU/G,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,EAA9E;;AAIA,eAAiB+G,MAAMjB,KAAN,CAAYuJ,kBAAZ,CAAjB,CACA,GAAIE,WAAW7a,MAAX,GAAsB,CAA1B,CAA6B,CAC3B,YAAA,CACD,CAED,aAAe8b,uBAAuBjB,UAAvB,CAAmCxI,KAAnC,CAAf,CACA,GAAI0J,QAAJ,CAAc,eAAA,CAEdA,SAAWC,qBAAqBnB,UAArB,CAAiC/S,GAAjC,CAAX,CACA,GAAIiU,QAAJ,CAAc,eAAA;;AAId,YAAA,CACD,CAED,aAAe,CACbxG,OAAQ0G,WADK,CAEb9F,eAAgB+F,OAFH,CAGbtG,IAAKuG,QAHQ,CAIb3G,eAAgB4G,kBAJH,CAKbnV,QAASoV,gBALI,CAMbhK,MAAOiK,aANM,CAAf;;;;;;;;;;;AAoBA,wBAAA,CAAyBje,CAAzB,CAA4Bke,IAA5B,CAAkC;;;;AAMhC,GAAIA,KAAKC,uBAAT,CAAkC,CAChCne,EAAIme,0BAAwBne,CAAxB,CAAJ,CACD,CAEDA,EAAIoe,uBAAuBpe,CAAvB,CAAJ,CACAA,EAAIqe,gBAAgBre,CAAhB,CAAmBke,KAAK3X,WAAxB,CAAJ,CACA,kBAAoB+X,oBAAoBte,CAApB,CAApB,CAEA,oBAAA,CACD,CAED,4BAA8B,CAC5Bue,YAAa,CACXJ,wBAAyB,IADd,CAEX5X,YAAa,IAFF,CAGXmV,mBAAoB,IAHT,CADe;;;;;;;;;;;;;;;;;;;AA0B5B8C,QAAS,gBAAA,CAAiBhR,IAAjB,CAAuB0Q,IAAvB,CAA6B,CACpC,MAAQ1Q,KAAKxN,CAAb,CACIqC,KAAOmL,KAAKnL,IADhB,CAEI2R,MAAQxG,KAAKwG,KAFjB,CAGIvK,IAAM+D,KAAK/D,GAHf,CAIIgV,WAAajR,KAAK6I,OAJtB,CAMA6H,KAAOjP,WAAS,EAAT,CAAa,KAAKsP,WAAlB,CAA+BL,IAA/B,CAAP,CAEAle,EAAIA,GAAKye,WAAWnI,IAAX,CAAgBjU,IAAhB,CAAT;;AAIA,SAAW,KAAKqc,cAAL,CAAoB1e,CAApB,CAAuBgU,KAAvB,CAA8BvK,GAA9B,CAAmCyU,IAAnC,CAAX,CAEA,GAAIS,mBAAiB9d,IAAjB,CAAJ,CAA4B,CAC1B,YAAY+d,kBAAL,CAAwB/d,IAAxB,CAA8Bb,CAA9B,CAAP,CACD;;AAID,8BAAgC,IAAhC,CACA,sBAAwB,KAAxB,CACA,mBAAqB6K,SAArB,CAEA,GAAI,CACF,IAAK,cAAgB+J,eAAarE,mBAAiB2N,IAAjB,EAAuB/J,MAAvB,CAA8B,SAAU0K,CAAV,CAAa,CAC3E,YAAYA,CAAL,IAAY,IAAnB,CACD,CAFiC,CAAb,CAAhB,CAEAxK,KAFL,CAEY,EAAES,0BAA4B,CAACT,MAAQU,UAAUxU,IAAV,EAAT,EAA2ByU,IAAzD,CAFZ,CAE4EF,0BAA4B,IAFxG,CAE8G,CAC5G,QAAUT,MAAMpJ,KAAhB,CAEAiT,KAAK9b,GAAL,EAAY,KAAZ,CACApC,EAAIye,WAAWnI,IAAX,CAAgBjU,IAAhB,CAAJ,CAEAxB,KAAO,KAAK6d,cAAL,CAAoB1e,CAApB,CAAuBgU,KAAvB,CAA8BvK,GAA9B,CAAmCyU,IAAnC,CAAP,CAEA,GAAIS,mBAAiB9d,IAAjB,CAAJ,CAA4B,CAC1B,MACD,CACF,CACF,CAAC,MAAOoN,GAAP,CAAY,CACZmH,kBAAoB,IAApB,CACAC,eAAiBpH,GAAjB,CACD,CAlBD,OAkBU,CACR,GAAI,CACF,GAAI,CAAC6G,yBAAD,EAA8BC,UAAUO,MAA5C,CAAoD,CAClDP,UAAUO,MAAV,GACD,CACF,CAJD,OAIU,CACR,GAAIF,iBAAJ,CAAuB,CACrB,oBAAA,CACD,CACF,CACF,CAED,YAAYwJ,kBAAL,CAAwB/d,IAAxB,CAA8Bb,CAA9B,CAAP,CACD,CAlF2B;AAsF5B0e,eAAgB,uBAAA,CAAwB1e,CAAxB,CAA2BgU,KAA3B,CAAkCvK,GAAlC,CAAuCyU,IAAvC,CAA6C,CAC3D,wBAAwBY,gBAAgB9e,CAAhB,CAAmBke,IAAnB,CAAjB,CAA2C,CAChDle,EAAGA,CAD6C,CAEhD0b,mBAAoBwC,KAAKxC,kBAFuB,CAGhD1H,MAAOA,KAHyC,CAIhDvK,IAAKA,GAJ2C,CAA3C,CAAP,CAMD,CA7F2B;;;AAmG5BmV,mBAAoB,2BAAA,CAA4B/d,IAA5B,CAAkCb,CAAlC,CAAqC,CACvD,GAAI,CAACa,IAAL,CAAW,CACT,WAAA,CACD,CAED,yBAAuBb,EAAEqC,IAAF,CAAOxB,IAAP,CAAhB,CAAP;;;;CAxG0B,CAA9B;;;;;;AAwHA,2BAA6B,CAAC,iBAAD,CAAoB,UAApB,CAAgC,SAAhC,CAA2C,UAA3C,CAAuD,OAAvD,CAA7B;;AAIA,yBAA2B,CAAC,UAAD,CAA3B;;;;;;;AASA,2BAA6B,CAAC,sBAAD,CAAyB,kBAAzB,CAA6C,kBAA7C,CAAiE,YAAjE,CAA+E,mBAA/E,CAAoG,cAApG,CAA7B,CAEA,yBAA2B,CAAC,YAAD,CAAe,cAAf,CAA+B,cAA/B,CAA+C,aAA/C,CAA8D,aAA9D,CAA6E,aAA7E,CAA4F,aAA5F,CAA2G,eAA3G,CAA4H,eAA5H,CAA6I,iBAA7I,CAAgK,UAAhK,CAA4K,YAA5K,CAA0L,IAA1L,CAAgM,iBAAhM,CAAmN,OAAnN,CAA3B,CAEA,0BAA4B,CAC1B2d,QAAS,gBAAA,CAAiBhR,IAAjB,CAAuB,CAC9B,MAAQA,KAAKxN,CAAb,CACIyJ,IAAM+D,KAAK/D,GADf,CAEIsV,UAAYvR,KAAKuR,SAFrB;;AAMA,UAAY,MAAZ,CAEA/K,MAAQgL,mBAAmBhf,CAAnB,CAAsBif,sBAAtB,CAA8CF,SAA9C,CAAR,CACA,GAAI/K,KAAJ,CAAW,qBAAqBA,KAAd,CAAqB,CAAEvK,IAAKA,GAAP,CAAYzJ,EAAGA,CAAf,CAArB,CAAP;;AAIXgU,MAAQkL,wBAAwBlf,CAAxB,CAA2Bmf,sBAA3B,CAAR,CACA,GAAInL,KAAJ,CAAW,qBAAqBA,KAAd,CAAqB,CAAEvK,IAAKA,GAAP,CAAYzJ,EAAGA,CAAf,CAArB,CAAP;AAGXgU,MAAQgL,mBAAmBhf,CAAnB,CAAsBof,oBAAtB,CAA4CL,SAA5C,CAAR,CACA,GAAI/K,KAAJ,CAAW,qBAAqBA,KAAd,CAAqB,CAAEvK,IAAKA,GAAP,CAAYzJ,EAAGA,CAAf,CAArB,CAAP;AAGXgU,MAAQkL,wBAAwBlf,CAAxB,CAA2Bqf,oBAA3B,CAAR,CACA,GAAIrL,KAAJ,CAAW,qBAAqBA,KAAd,CAAqB,CAAEvK,IAAKA,GAAP,CAAYzJ,EAAGA,CAAf,CAArB,CAAP;AAGX,MAAO,EAAP,CACD,CA5ByB,CAA5B;;;;;;AAqCA,qBAAuB,CAAC,KAAD,CAAQ,OAAR,CAAiB,WAAjB,CAA8B,eAA9B,CAA+C,YAA/C,CAA6D,WAA7D,CAA0E,SAA1E,CAAvB,CAEA,sBAAwB,GAAxB;;;;;;;AASA,qBAAuB,CAAC,sBAAD,CAAyB,mBAAzB,CAA8C,oBAA9C,CAAoE,mBAApE,CAAyF,oBAAzF,CAA+G,qBAA/G,CAAsI,aAAtI,CAAqJ,iBAArJ,CAAwK,oBAAxK,CAA8L,qBAA9L,CAAqN,eAArN,CAAsO,YAAtO,CAAoP,YAApP,CAAkQ,cAAlQ,CAAkR,cAAlR,CAAkS,yBAAlS,CAA6T,qBAA7T,CAAoV,qBAApV,CAA2W,SAA3W,CAAsX,SAAtX,CAAiY,gBAAjY,CAAmZ,gBAAnZ,CAAqa,SAAra,CAAvB;;AAIA,aAAe,aAAf,CACA,wBAA0B,CAAC,CAAC,SAAD,CAAYsf,QAAZ,CAAD,CAAwB,CAAC,SAAD,CAAYA,QAAZ,CAAxB,CAA1B,CAEA,2BAA6B,CAC3Bd,QAAS,gBAAA,CAAiBhR,IAAjB,CAAuB,CAC9B,MAAQA,KAAKxN,CAAb,CACI+e,UAAYvR,KAAKuR,SADrB,CAGA,WAAa,MAAb;;AAIA7H,OAAS8H,mBAAmBhf,CAAnB,CAAsBuf,gBAAtB,CAAwCR,SAAxC,CAAT,CACA,GAAI7H,QAAUA,OAAOvV,MAAP,CAAgB6d,iBAA9B,CAAiD,CAC/C,mBAAmBtI,MAAZ,CAAP,CACD;AAGDA,OAASgI,wBAAwBlf,CAAxB,CAA2Byf,gBAA3B,CAA6C,CAA7C,CAAT,CACA,GAAIvI,QAAUA,OAAOvV,MAAP,CAAgB6d,iBAA9B,CAAiD,CAC/C,mBAAmBtI,MAAZ,CAAP,CACD;;AAID,8BAAgC,IAAhC,CACA,sBAAwB,KAAxB,CACA,mBAAqBrM,SAArB,CAEA,GAAI,CACF,IAAK,cAAgB+J,eAAa8K,mBAAb,CAAhB,CAAmDrL,KAAxD,CAA+D,EAAES,0BAA4B,CAACT,MAAQU,UAAUxU,IAAV,EAAT,EAA2ByU,IAAzD,CAA/D,CAA+HF,0BAA4B,IAA3J,CAAiK,CAC/J,UAAYT,MAAMpJ,KAAlB,CAEA,UAAYoH,iBAAesN,KAAf,CAAsB,CAAtB,CAAZ,CAEA,aAAe7Q,MAAM,CAAN,CAAf,CACA,UAAYA,MAAM,CAAN,CAAZ,CAEA,SAAW9O,EAAEtB,QAAF,CAAX,CACA,GAAImC,KAAKc,MAAL,GAAgB,CAApB,CAAuB,CACrB,SAAWd,KAAK0B,IAAL,EAAX,CACA,GAAIqd,MAAMze,IAAN,CAAWoB,IAAX,CAAJ,CAAsB,CACpB,mBAAmBA,IAAZ,CAAP,CACD,CACF,CACF,CACF,CAAC,MAAO0L,GAAP,CAAY,CACZmH,kBAAoB,IAApB,CACAC,eAAiBpH,GAAjB,CACD,CApBD,OAoBU,CACR,GAAI,CACF,GAAI,CAAC6G,yBAAD,EAA8BC,UAAUO,MAA5C,CAAoD,CAClDP,UAAUO,MAAV,GACD,CACF,CAJD,OAIU,CACR,GAAIF,iBAAJ,CAAuB,CACrB,oBAAA,CACD,CACF,CACF,CAED,WAAA,CACD,CA3D0B,CAA7B;;;;AAkEA,6BAA+B,CAAC,wBAAD,CAA2B,aAA3B,CAA0C,SAA1C,CAAqD,gBAArD,CAAuE,WAAvE,CAAoF,cAApF,CAAoG,UAApG,CAAgH,UAAhH,CAA4H,SAA5H,CAAuI,eAAvI,CAAwJ,UAAxJ,CAAoK,cAApK,CAAoL,qBAApL,CAA2M,cAA3M,CAA2N,SAA3N,CAAsO,MAAtO,CAA/B;;;AAKA,6BAA+B,CAAC,4BAAD,CAA+B,oBAA/B,CAAqD,0BAArD,CAAiF,kBAAjF,CAAqG,oBAArG,CAA2H,kBAA3H,CAA+I,iBAA/I,CAAkK,aAAlK,CAAiL,eAAjL,CAAkM,qBAAlM,CAAyN,mBAAzN,CAA8O,cAA9O,CAA8P,aAA9P,CAA6Q,YAA7Q,CAA2R,kBAA3R,CAA+S,WAA/S,CAA4T,UAA5T,CAA/B;;;AAKA,oBAAsB,mDAAtB,CACA,2BAA6B;AAE7B,UAAA,CAAW,4BAAX,CAAyC,GAAzC,CAF6B;;;AAM7B,UAAA,CAAW,6BAAX,CAA0C,GAA1C,CAN6B;AAQ7B,UAAA,CAAW,cAAgByK,eAAhB,CAAkC,aAA7C,CAA4D,GAA5D,CAR6B,CAA7B,CAUA,kCAAoC,CAClCrB,QAAS,gBAAA,CAAiBhR,IAAjB,CAAuB,CAC9B,MAAQA,KAAKxN,CAAb,CACIyJ,IAAM+D,KAAK/D,GADf,CAEIsV,UAAYvR,KAAKuR,SAFrB,CAIA,kBAAoB,MAApB;;;AAIAe,cAAgBd,mBAAmBhf,CAAnB,CAAsB+f,wBAAtB,CAAgDhB,SAAhD,CAA2D,KAA3D,CAAhB,CACA,GAAIe,aAAJ,CAAmB,0BAA0BA,aAAnB,CAAP;;AAInBA,cAAgBZ,wBAAwBlf,CAAxB,CAA2BggB,wBAA3B,CAAhB,CACA,GAAIF,aAAJ,CAAmB,0BAA0BA,aAAnB,CAAP;AAGnBA,cAAgBG,iBAAexW,GAAf,CAAoByW,sBAApB,CAAhB,CACA,GAAIJ,aAAJ,CAAmB,0BAA0BA,aAAnB,CAAP,CAEnB,WAAA,CACD,CAvBiC,CAApC;;;;;;;;;;;;;;AA2CA,wBAA0B;AAExBtB,QAAS,gBAAA,EAAmB,CAC1B,WAAA,CACD,CAJuB,CAA1B;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAsCA,6BAA+B,CAAC,UAAD,CAAa,eAAb,CAA8B,WAA9B,CAA/B,CAEA,6BAA+B,CAAC,qBAAD,CAA/B,CAEA,kCAAoC,CAAC,QAAD,CAAW,YAAX,CAAyB,OAAzB,CAAkC,OAAlC,CAA2C,UAA3C,CAApC,CACA,qCAAuC,UAAA,CAAW2B,8BAA8BvhB,IAA9B,CAAmC,GAAnC,CAAX,CAAoD,GAApD,CAAvC,CAEA,kCAAoC,CAAC,QAAD,CAAW,QAAX,CAAqB,OAArB,CAA8B,UAA9B,CAA0C,UAA1C,CAAsD,MAAtD,CAA8D,IAA9D,CAAoE,YAApE,CAAkF,MAAlF,CAA0F,QAA1F,CAAoG,QAApG,CAA8G,KAA9G,CAAqH,QAArH,CAA+H,SAA/H,CAA0I,QAA1I,CAAoJ,SAApJ,CAA+J,SAA/J,CAA0K,QAA1K,CAAoL,OAApL,CAA6L,UAA7L,CAAyM,SAAzM,CAAoN,OAApN,CAA6N,OAA7N,CAAsO,KAAtO,CAA6O,aAA7O,CAApC,CACA,qCAAuC,UAAA,CAAWwhB,8BAA8BxhB,IAA9B,CAAmC,GAAnC,CAAX,CAAoD,GAApD,CAAvC,CAEA,WAAa,gBAAb,CACA,WAAa,kBAAb,CAEA,eAAA,CAAgBmC,KAAhB,CAAuB,CACrB,MAAO,CAACA,MAAM8B,IAAN,CAAW,OAAX,GAAuB,EAAxB,EAA8B,GAA9B,EAAqC9B,MAAM8B,IAAN,CAAW,IAAX,GAAoB,EAAzD,CAAP,CACD;AAGD,sBAAA,CAAuB4G,GAAvB,CAA4B,CAC1BA,IAAMA,IAAI3D,IAAJ,EAAN,CACA,UAAY,CAAZ,CAEA,GAAIua,iCAAiClf,IAAjC,CAAsCsI,GAAtC,CAAJ,CAAgD,CAC9CzE,OAAS,EAAT,CACD,CAED,GAAIsb,iCAAiCnf,IAAjC,CAAsCsI,GAAtC,CAAJ,CAAgD,CAC9CzE,OAAS,EAAT,CACD;;AAID,GAAIub,OAAOpf,IAAP,CAAYsI,GAAZ,CAAJ,CAAsB,CACpBzE,OAAS,EAAT,CACD,CAED,GAAIwb,OAAOrf,IAAP,CAAYsI,GAAZ,CAAJ,CAAsB,CACpBzE,OAAS,EAAT,CACD;AAID,YAAA,CACD;AAGD,kBAAA,CAAmBtC,IAAnB,CAAyB,CACvB,GAAIA,KAAKG,IAAL,CAAU,KAAV,CAAJ,CAAsB,CACpB,QAAA,CACD,CAED,QAAA,CACD;;AAID,uBAAA,CAAwBH,IAAxB,CAA8B,CAC5B,UAAY,CAAZ,CACA,eAAiBA,KAAKX,OAAL,CAAa,QAAb,EAAuByG,KAAvB,EAAjB,CAEA,GAAIiY,WAAW9e,MAAX,GAAsB,CAA1B,CAA6B,CAC3BqD,OAAS,EAAT,CACD,CAED,YAActC,KAAK4D,MAAL,EAAd,CACA,aAAe,MAAf,CACA,GAAIK,QAAQhF,MAAR,GAAmB,CAAvB,CAA0B,CACxB+e,SAAW/Z,QAAQL,MAAR,EAAX,CACD,CAED,CAACK,OAAD,CAAU+Z,QAAV,EAAoB5W,OAApB,CAA4B,SAAU/I,KAAV,CAAiB,CAC3C,GAAI0Q,mBAAiBtQ,IAAjB,CAAsBwf,OAAO5f,KAAP,CAAtB,CAAJ,CAA0C,CACxCiE,OAAS,EAAT,CACD,CACF,CAJD,EAMA,YAAA,CACD;;AAID,uBAAA,CAAwBtC,IAAxB,CAA8B,CAC5B,UAAY,CAAZ,CACA,aAAeA,KAAKnC,IAAL,EAAf,CACA,YAAcwH,SAASvH,GAAT,CAAa,CAAb,CAAd,CAEA,GAAIQ,SAAWA,QAAQP,OAAR,GAAoB,YAAnC,CAAiD,CAC/CuE,OAAS,EAAT,CACD,CAED,GAAIyM,mBAAiBtQ,IAAjB,CAAsBwf,OAAO5Y,QAAP,CAAtB,CAAJ,CAA6C,CAC3C/C,OAAS,EAAT,CACD,CAED,YAAA,CACD,CAED,0BAAA,CAA2BtC,IAA3B,CAAiC,CAC/B,UAAY,CAAZ,CAEA,UAAYwC,WAAWxC,KAAKG,IAAL,CAAU,OAAV,CAAX,CAAZ,CACA,WAAaqC,WAAWxC,KAAKG,IAAL,CAAU,QAAV,CAAX,CAAb,CACA,QAAUH,KAAKG,IAAL,CAAU,KAAV,CAAV;AAGA,GAAIC,OAASA,OAAS,EAAtB,CAA0B,CACxBkC,OAAS,EAAT,CACD;AAGD,GAAIrC,QAAUA,QAAU,EAAxB,CAA4B,CAC1BqC,OAAS,EAAT,CACD,CAED,GAAIlC,OAASH,MAAT,EAAmB,CAACkV,IAAIjN,QAAJ,CAAa,QAAb,CAAxB,CAAgD,CAC9C,SAAW9H,MAAQH,MAAnB,CACA,GAAIie,KAAO,IAAX,CAAiB;AAEf5b,OAAS,GAAT,CACD,CAHD,IAGO,CACLA,OAASU,KAAKmb,KAAL,CAAWD,KAAO,IAAlB,CAAT,CACD,CACF,CAED,YAAA,CACD,CAED,wBAAA,CAAyBE,KAAzB,CAAgC3gB,KAAhC,CAAuC,CACrC,aAAawB,MAAN,CAAe,CAAf,CAAmBxB,KAA1B,CACD;;;;;;;;AAUD,iCAAmC,CACjCqe,QAAS,gBAAA,CAAiBhR,IAAjB,CAAuB,CAC9B,MAAQA,KAAKxN,CAAb,CACI4I,QAAU4E,KAAK5E,OADnB,CAEImW,UAAYvR,KAAKuR,SAFrB,CAGI1c,KAAOmL,KAAKnL,IAHhB,CAKA,aAAe,MAAf,CACA,GAAI,CAACrC,EAAEsC,OAAH,EAActC,EAAE,MAAF,EAAU2B,MAAV,GAAqB,CAAvC,CAA0C,CACxC3B,EAAE,GAAF,EAAOwI,KAAP,GAAeiP,OAAf,CAAuBpV,IAAvB,EACD;;;;AAMD,aAAe2c,mBAAmBhf,CAAnB,CAAsB+gB,wBAAtB,CAAgDhC,SAAhD,CAA2D,KAA3D,CAAf,CAEA,GAAIiC,QAAJ,CAAc,CACZC,SAAWpD,QAAQmD,QAAR,CAAX,CAEA,GAAIC,QAAJ,CAAc,eAAA,CACf;;;AAKD,aAAejhB,EAAE4I,OAAF,CAAf,CACA,SAAW5I,EAAE,KAAF,CAASuJ,QAAT,EAAmBgB,OAAnB,EAAX,CACA,cAAgB,EAAhB,CAEA2W,KAAKpX,OAAL,CAAa,SAAU0G,GAAV,CAAerQ,KAAf,CAAsB,CACjC,SAAWH,EAAEwQ,GAAF,CAAX,CACA,QAAU9N,KAAKG,IAAL,CAAU,KAAV,CAAV,CAEA,GAAI,CAACgV,GAAL,CAAU,OAEV,UAAYsJ,cAActJ,GAAd,CAAZ,CACA7S,OAASoc,UAAU1e,IAAV,CAAT,CACAsC,OAASqc,eAAe3e,IAAf,CAAT,CACAsC,OAASsc,eAAe5e,IAAf,CAAT,CACAsC,OAASuc,kBAAkB7e,IAAlB,CAAT,CACAsC,OAASwc,gBAAgBN,IAAhB,CAAsB/gB,KAAtB,CAAT,CAEAshB,UAAU5J,GAAV,EAAiB7S,KAAjB,CACD,CAdD,EAgBA,0BAA4BuL,mBAAiBkR,SAAjB,EAA4Bhe,MAA5B,CAAmC,SAAUC,GAAV,CAAetB,GAAf,CAAoB,CACjF,iBAAiBA,GAAV,EAAiBsB,IAAI,CAAJ,CAAjB,CAA0B,CAACtB,GAAD,CAAMqf,UAAUrf,GAAV,CAAN,CAA1B,CAAkDsB,GAAzD,CACD,CAF2B,CAEzB,CAAC,IAAD,CAAO,CAAP,CAFyB,CAA5B,CAGIiZ,uBAAyBtK,iBAAeuK,qBAAf,CAAsC,CAAtC,CAH7B,CAII8E,OAAS/E,uBAAuB,CAAvB,CAJb,CAKI/U,SAAW+U,uBAAuB,CAAvB,CALf,CAOA,GAAI/U,SAAW,CAAf,CAAkB,CAChBqZ,SAAWpD,QAAQ6D,MAAR,CAAX,CAEA,GAAIT,QAAJ,CAAc,eAAA,CACf;;AAID,8BAAgC,IAAhC,CACA,sBAAwB,KAAxB,CACA,mBAAqBpW,SAArB,CAEA,GAAI,CACF,IAAK,cAAgB+J,eAAa+M,wBAAb,CAAhB,CAAwDtN,KAA7D,CAAoE,EAAES,0BAA4B,CAACT,MAAQU,UAAUxU,IAAV,EAAT,EAA2ByU,IAAzD,CAApE,CAAoIF,0BAA4B,IAAhK,CAAsK,CACpK,aAAeT,MAAMpJ,KAArB,CAEA,UAAYjL,EAAEtB,QAAF,EAAY8J,KAAZ,EAAZ,CACA,QAAUzH,MAAM8B,IAAN,CAAW,KAAX,CAAV,CACA,GAAIgV,GAAJ,CAAS,CACPoJ,SAAWpD,QAAQhG,GAAR,CAAX,CACA,GAAIoJ,QAAJ,CAAc,eAAA,CACf,CAED,SAAWlgB,MAAM8B,IAAN,CAAW,MAAX,CAAX,CACA,GAAI+e,IAAJ,CAAU,CACRX,SAAWpD,QAAQ+D,IAAR,CAAX,CACA,GAAIX,QAAJ,CAAc,eAAA,CACf,CAED,UAAYlgB,MAAM8B,IAAN,CAAW,OAAX,CAAZ,CACA,GAAIoI,KAAJ,CAAW,CACTgW,SAAWpD,QAAQ5S,KAAR,CAAX,CACA,GAAIgW,QAAJ,CAAc,eAAA,CACf,CACF,CACF,CAAC,MAAOhT,GAAP,CAAY,CACZmH,kBAAoB,IAApB,CACAC,eAAiBpH,GAAjB,CACD,CA1BD,OA0BU,CACR,GAAI,CACF,GAAI,CAAC6G,yBAAD,EAA8BC,UAAUO,MAA5C,CAAoD,CAClDP,UAAUO,MAAV,GACD,CACF,CAJD,OAIU,CACR,GAAIF,iBAAJ,CAAuB,CACrB,oBAAA,CACD,CACF,CACF,CAED,WAAA,CACD,CAzGgC,CAAnC;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAyQA,wBAAA,CAAyBpQ,KAAzB,CAAgC6c,UAAhC,CAA4CD,IAA5C,CAAkD;;;;;AAMhD,GAAI5c,MAAQ,CAAZ,CAAe,CACb,eAAiB,cAAY8c,eAAZ,CAA4B,IAA5B,CAAkCD,UAAlC,CAA8CD,IAA9C,EAAoDG,KAApD,EAAjB;;;;;;AAOA,gBAAkB,IAAMC,UAAxB,CACA,iBAAmB,EAAE,KAAOC,YAAc,GAArB,CAAF,CAAnB,CACA,aAAeC,YAAf,CACD,CAED,QAAA,CACD,CAED,sBAAA,CAAuBlY,QAAvB,CAAiC8I,OAAjC,CAA0C;;;;AAKxC,UAAY,CAAZ,CAEA,GAAI3L,cAAYhG,IAAZ,CAAiB6I,SAASlE,IAAT,EAAjB,CAAJ,CAAuC,CACrC,kBAAoBlD,SAASoH,QAAT,CAAmB,EAAnB,CAApB;;;AAIA,GAAImY,cAAgB,CAApB,CAAuB,CACrBnd,MAAQ,CAAC,EAAT,CACD,CAFD,IAEO,CACLA,MAAQU,KAAKE,GAAL,CAAS,CAAT,CAAY,GAAKuc,aAAjB,CAAR,CACD;;;AAKD,GAAIrP,SAAWA,SAAWqP,aAA1B,CAAyC,CACvCnd,OAAS,EAAT,CACD,CACF,CAED,YAAA,CACD,CAED,wBAAA,CAAyB8N,OAAzB,CAAkCsP,IAAlC,CAAwC;;;AAItC,GAAItP,SAAW,CAACsP,IAAhB,CAAsB,CACpB,SAAA,CACD,CAED,QAAA,CACD,CAED,eAAiB,IAAjB;;AAIA,4BAA8B,CAAC,OAAD,CAAU,SAAV,CAAqB,SAArB,CAAgC,SAAhC,CAA2C,QAA3C,CAAqD,OAArD,CAA8D,OAA9D,CAAuE,OAAvE,CAAgF,KAAhF,CAAuF,OAAvF,CAAgG,MAAhG,CAAwG,QAAxG,CAAkH,KAAlH,CAAyH,iBAAzH,CAA9B,CACA,+BAAiC,UAAA,CAAWC,wBAAwBzjB,IAAxB,CAA6B,GAA7B,CAAX,CAA8C,GAA9C,CAAjC;;;AAKA,wBAA0B,UAAA,CAAW,4CAAX,CAAyD,GAAzD,CAA1B;;AAIA,uBAAyB,UAAA,CAAW,kBAAX,CAA+B,GAA/B,CAAzB;;AAIA,wBAA0B,UAAA,CAAW,yBAAX,CAAsC,GAAtC,CAA1B;AAIA,6BAAA,CAA8BgjB,IAA9B,CAAoC;AAElC,GAAIU,2BAA2BnhB,IAA3B,CAAgCygB,IAAhC,CAAJ,CAA2C,CACzC,MAAO,CAAC,EAAR,CACD,CAED,QAAA,CACD,CAED,kBAAA,CAAmBW,KAAnB,CAA0B,CACxB,MAAO,CAACA,MAAM1f,IAAN,CAAW,OAAX,GAAuB,EAAxB,EAA8B,GAA9B,EAAqC0f,MAAM1f,IAAN,CAAW,IAAX,GAAoB,EAAzD,CAAP,CACD,CAED,yBAAA,CAA0B0f,KAA1B,CAAiC;;;AAI/B,YAAcA,MAAMjc,MAAN,EAAd,CACA,kBAAoB,KAApB,CACA,kBAAoB,KAApB,CACA,UAAY,CAAZ,CAEAkc,YAAY1V,MAAM,CAAN,CAAS,CAAT,CAAZ,EAAyBhD,OAAzB,CAAiC,UAAY,CAC3C,GAAInD,QAAQhF,MAAR,GAAmB,CAAvB,CAA0B,CACxB,OACD,CAED,eAAiB8gB,UAAU9b,OAAV,CAAmB,GAAnB,CAAjB;;AAIA,GAAI,CAAC+b,aAAD,EAAkBC,UAAQxhB,IAAR,CAAayhB,UAAb,CAAtB,CAAgD,CAC9CF,cAAgB,IAAhB,CACA1d,OAAS,EAAT,CACD;;;AAKD,GAAI,CAAC6d,aAAD,EAAkBze,oBAAkBjD,IAAlB,CAAuByhB,UAAvB,CAAlB,EAAwDN,2BAA2BnhB,IAA3B,CAAgCyhB,UAAhC,CAA5D,CAAyG,CACvG,GAAI,CAAC3e,oBAAkB9C,IAAlB,CAAuByhB,UAAvB,CAAL,CAAyC,CACvCC,cAAgB,IAAhB,CACA7d,OAAS,EAAT,CACD,CACF,CAED2B,QAAUA,QAAQL,MAAR,EAAV,CACD,CAzBD,EA2BA,YAAA,CACD,CAED,sBAAA,CAAuBwc,QAAvB,CAAiC;;AAG/B,GAAIC,oBAAoB5hB,IAApB,CAAyB2hB,QAAzB,CAAJ,CAAwC,CACtC,MAAO,CAAC,GAAR,CACD,CAED,QAAA,CACD,CAED,oBAAA,CAAqBlB,IAArB,CAA2BC,UAA3B,CAAuCmB,OAAvC,CAAgDnU,SAAhD,CAA2D7E,QAA3D,CAAqEiZ,YAArE,CAAmF;AAEjF,GAAIA,aAAazf,IAAb,CAAkB,SAAUiG,GAAV,CAAe,CACnC,cAAgBA,GAAhB,CACD,CAFG,IAEGoB,SAFP,CAEkB,CAChB,YAAA,CACD;;AAID,GAAI,CAAC+W,IAAD,EAASA,OAASC,UAAlB,EAAgCD,OAASoB,OAA7C,CAAsD,CACpD,YAAA,CACD,CAED,aAAenU,UAAUpB,QAAzB,CAEA,eAAiB9D,MAAIoF,KAAJ,CAAU6S,IAAV,CAAjB,CACIsB,SAAWxS,WAAWjD,QAD1B;AAMA,GAAIyV,WAAazV,QAAjB,CAA2B,CACzB,YAAA,CACD;;AAID,aAAemU,KAAK7a,OAAL,CAAaic,OAAb,CAAsB,EAAtB,CAAf,CACA,GAAI,CAACG,WAAWhiB,IAAX,CAAgBiiB,QAAhB,CAAL,CAAgC,CAC9B,YAAA,CACD;;AAID,GAAId,2BAA2BnhB,IAA3B,CAAgC6I,QAAhC,CAAJ,CAA+C,CAC7C,YAAA,CACD;AAGD,GAAIA,SAASrI,MAAT,CAAkB,EAAtB,CAA0B,CACxB,YAAA,CACD,CAED,WAAA,CACD,CAED,qBAAA,CAAsBigB,IAAtB,CAA4ByB,SAA5B,CAAuC;;;;AAKrC,GAAI,CAACA,UAAUliB,IAAV,CAAeygB,IAAf,CAAL,CAA2B,CACzB,MAAO,CAAC,EAAR,CACD,CAED,QAAA,CACD,CAED,0BAAA,CAA2BkB,QAA3B,CAAqC;AAEnC,GAAIQ,oBAAoBniB,IAApB,CAAyB2hB,QAAzB,CAAJ,CAAwC,CACtC,SAAA,CACD,CAED,QAAA,CACD,CAED,sBAAA,CAAuBA,QAAvB,CAAiC;AAE/B,GAAIS,mBAAmBpiB,IAAnB,CAAwB2hB,QAAxB,CAAJ,CAAuC;;;;AAKrC,GAAIQ,oBAAoBniB,IAApB,CAAyB2hB,QAAzB,CAAJ,CAAwC,CACtC,MAAO,CAAC,EAAR,CACD,CACF,CAED,QAAA,CACD,CAED,sBAAA,CAAuBE,OAAvB,CAAgC,CAC9B,iBAAO,CAAW,IAAMA,OAAjB,CAA0B,GAA1B,CAAP,CACD,CAED,gBAAA,CAAiBT,KAAjB,CAAwBvY,QAAxB,CAAkC,CAChC,MAAO,CAACA,UAAYuY,MAAMhgB,IAAN,EAAb,EAA6B,GAA7B,EAAoCggB,MAAM1f,IAAN,CAAW,OAAX,GAAuB,EAA3D,EAAiE,GAAjE,EAAwE0f,MAAM1f,IAAN,CAAW,IAAX,GAAoB,EAA5F,CAAP,CACD,CAED,mBAAA,CAAoB2K,IAApB,CAA0B,CACxB,UAAYA,KAAKgW,KAAjB,CACI3B,WAAarU,KAAKqU,UADtB,CAEImB,QAAUxV,KAAKwV,OAFnB,CAGInU,UAAYrB,KAAKqB,SAHrB,CAII7O,EAAIwN,KAAKxN,CAJb,CAKIyjB,kBAAoBjW,KAAKyV,YAL7B,CAMIA,aAAeQ,oBAAsB5Y,SAAtB,CAAkC,EAAlC,CAAuC4Y,iBAN1D,CAQA5U,UAAYA,WAAalF,MAAIoF,KAAJ,CAAU8S,UAAV,CAAzB,CACA,cAAgB6B,cAAcV,OAAd,CAAhB,CACA,SAAWW,cAAY3jB,CAAZ,CAAX;;;;;;;AASA,gBAAkBwjB,MAAM/f,MAAN,CAAa,SAAUmgB,aAAV,CAAyBC,IAAzB,CAA+B;;;AAI5D,UAAY3hB,WAAS2hB,IAAT,CAAZ,CACA,SAAWC,eAAa7hB,MAAM2f,IAAnB,CAAX,CACA,UAAY5hB,EAAE6jB,IAAF,CAAZ,CACA,aAAetB,MAAMhgB,IAAN,EAAf,CAEA,GAAI,CAACwhB,YAAYnC,IAAZ,CAAkBC,UAAlB,CAA8BmB,OAA9B,CAAuCnU,SAAvC,CAAkD7E,QAAlD,CAA4DiZ,YAA5D,CAAL,CAAgF,CAC9E,oBAAA,CACD;AAGD,GAAI,CAACW,cAAchC,IAAd,CAAL,CAA0B,CACxBgC,cAAchC,IAAd,EAAsB,CACpB5c,MAAO,CADa,CAEpBgF,SAAUA,QAFU,CAGpB4X,KAAMA,IAHc,CAAtB,CAKD,CAND,IAMO,CACLgC,cAAchC,IAAd,EAAoB5X,QAApB,CAA+B4Z,cAAchC,IAAd,EAAoB5X,QAApB,CAA+B,GAA/B,CAAqCA,QAApE,CACD,CAED,iBAAmB4Z,cAAchC,IAAd,CAAnB,CACA,aAAeoC,QAAQzB,KAAR,CAAevY,QAAf,CAAf,CACA,YAAcia,iBAAerC,IAAf,CAAd,CAEA,UAAYsC,aAAatC,IAAb,CAAmByB,SAAnB,CAAZ,CACAre,OAASmf,kBAAkBrB,QAAlB,CAAT,CACA9d,OAASof,cAActB,QAAd,CAAT,CACA9d,OAASqf,cAAcvB,QAAd,CAAT,CACA9d,OAASsf,iBAAiB/B,KAAjB,CAAT,CACAvd,OAASuf,qBAAqB3C,IAArB,CAAT,CACA5c,OAASwf,gBAAgB1R,OAAhB,CAAyBsP,IAAzB,CAAT,CACApd,OAASyf,cAAcza,QAAd,CAAwB8I,OAAxB,CAAT,CACA9N,OAAS0f,gBAAgB1f,KAAhB,CAAuB6c,UAAvB,CAAmCD,IAAnC,CAAT,CAEA+C,aAAa3f,KAAb,CAAqBA,KAArB,CAEA,oBAAA,CACD,CAzCiB,CAyCf,EAzCe,CAAlB,CA2CA,0BAAwB4f,WAAjB,EAA8BjjB,MAA9B,GAAyC,CAAzC,CAA6C,IAA7C,CAAoDijB,WAA3D,CACD;;AAKD,gCAAkC,CAChCpG,QAAS,gBAAA,CAAiBhR,IAAjB,CAAuB,CAC9B,MAAQA,KAAKxN,CAAb,CACIyJ,IAAM+D,KAAK/D,GADf,CAEIoF,UAAYrB,KAAKqB,SAFrB,CAGI4U,kBAAoBjW,KAAKyV,YAH7B,CAIIA,aAAeQ,oBAAsB5Y,SAAtB,CAAkC,EAAlC,CAAuC4Y,iBAJ1D,CAMA5U,UAAYA,WAAalF,MAAIoF,KAAJ,CAAUtF,GAAV,CAAzB,CAEA,eAAiBqa,eAAara,GAAb,CAAjB,CACA,YAAcob,iBAAepb,GAAf,CAAoBoF,SAApB,CAAd,CAEA,UAAY7O,EAAE,SAAF,EAAauK,OAAb,EAAZ,CAEA,gBAAkBua,WAAW,CAC3BtB,MAAOA,KADoB,CAE3B3B,WAAYA,UAFe,CAG3BmB,QAASA,OAHkB,CAI3BnU,UAAWA,SAJgB,CAK3B7O,EAAGA,CALwB,CAM3BijB,aAAcA,YANa,CAAX,CAAlB;AAUA,GAAI,CAAC8B,WAAL,CAAkB,WAAA;;AAIlB,YAAcxU,mBAAiBwU,WAAjB,EAA8BthB,MAA9B,CAAqC,SAAUC,GAAV,CAAemgB,IAAf,CAAqB,CACtE,eAAiBkB,YAAYlB,IAAZ,CAAjB,CACA,kBAAkB7e,KAAX,CAAmBtB,IAAIsB,KAAvB,CAA+BggB,UAA/B,CAA4CthB,GAAnD,CACD,CAHa,CAGX,CAAEsB,MAAO,CAAC,GAAV,CAHW,CAAd;;AAOA,GAAIigB,QAAQjgB,KAAR,EAAiB,EAArB,CAAyB,CACvB,eAAe4c,IAAf,CACD,CAED,WAAA,CACD,CAzC+B,CAAlC,CA4CA,6BAA+B,CAAC,QAAD,CAA/B,CAEA,oBAAA,CAAqBnY,GAArB,CAA0B,CACxB,cAAgBE,MAAIoF,KAAJ,CAAUtF,GAAV,CAAhB,CACA,aAAeoF,UAAUpB,QAAzB,CAEA,eAAA,CACD,CAED,eAAA,CAAgBhE,GAAhB,CAAqB,CACnB,MAAO,CACLA,IAAKA,GADA,CAELoN,OAAQqO,YAAYzb,GAAZ,CAFH,CAAP,CAID,CAED,wBAA0B,CACxB+U,QAAS,gBAAA,CAAiBhR,IAAjB,CAAuB,CAC9B,MAAQA,KAAKxN,CAAb,CACIyJ,IAAM+D,KAAK/D,GADf,CAEIsV,UAAYvR,KAAKuR,SAFrB,CAIA,eAAiB/e,EAAE,qBAAF,CAAjB,CACA,GAAImlB,WAAWxjB,MAAX,GAAsB,CAA1B,CAA6B,CAC3B,SAAWwjB,WAAWtiB,IAAX,CAAgB,MAAhB,CAAX,CACA,GAAI+e,IAAJ,CAAU,CACR,cAAcA,IAAP,CAAP,CACD,CACF,CAED,YAAc5C,mBAAmBhf,CAAnB,CAAsBolB,wBAAtB,CAAgDrG,SAAhD,CAAd,CACA,GAAIsG,OAAJ,CAAa,CACX,cAAcA,OAAP,CAAP,CACD,CAED,cAAc5b,GAAP,CAAP,CACD,CApBuB,CAA1B,CAuBA,2BAA6B,CAAC,gBAAD,CAAmB,qBAAnB,CAA7B,CAEA,gBAAA,CAAiBb,OAAjB,CAA0B5I,CAA1B,CAA6B,CAC3B,cAAgBiN,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,GAApF,CAEArE,QAAUA,QAAQ7B,OAAR,CAAgB,UAAhB,CAA4B,GAA5B,EAAiCjB,IAAjC,EAAV,CACA,mBAAiB8C,OAAV,CAAmB0c,SAAnB,CAA8B,CAAEC,QAAS,UAAX,CAA9B,CAAP,CACD,CAED,4BAA8B,CAC5B/G,QAAS,gBAAA,CAAiBhR,IAAjB,CAAuB,CAC9B,MAAQA,KAAKxN,CAAb,CACI4I,QAAU4E,KAAK5E,OADnB,CAEImW,UAAYvR,KAAKuR,SAFrB,CAIA,YAAcC,mBAAmBhf,CAAnB,CAAsBwlB,sBAAtB,CAA8CzG,SAA9C,CAAd,CACA,GAAI/G,OAAJ,CAAa,CACX,eAAe3N,YAAU2N,OAAV,CAAmBhY,CAAnB,CAAR,CAAP,CACD;AAED,cAAgB,GAAhB,CACA,iBAAmB4I,QAAQ7C,KAAR,CAAc,CAAd,CAAiBuf,UAAY,CAA7B,CAAnB,CACA,eAAetlB,EAAEylB,YAAF,EAAgBljB,IAAhB,EAAR,CAAgCvC,CAAhC,CAAmCslB,SAAnC,CAAP,CACD,CAd2B,CAA9B,CAiBA,8BAAgC,CAC9B9G,QAAS,gBAAA,CAAiBhR,IAAjB,CAAuB,CAC9B,YAAcA,KAAK5E,OAAnB,CAEA,MAAQyN,UAAQC,IAAR,CAAa1N,OAAb,CAAR,CACA,aAAe5I,EAAE,KAAF,EAASwI,KAAT,EAAf,CAEA,SAAW1B,kBAAgByC,SAAShH,IAAT,EAAhB,CAAX,CACA,YAAYwQ,KAAL,CAAW,IAAX,EAAiBpR,MAAxB,CACD,CAT6B,CAAhC,CAYA,qBAAuB;AAErBkV,OAAQ,GAFa,CAGrB7C,MAAO0R,sBAAsBlH,OAHR,CAIrBrH,eAAgBwO,8BAA8BnH,OAJzB,CAKrBtH,OAAQ0O,uBAAuBpH,OALV,CAMrB5V,QAASid,wBAAwBrH,OAAxB,CAAgCsH,IAAhC,CAAqCD,uBAArC,CANY,CAOrB/N,eAAgBiO,6BAA6BvH,OAPxB,CAQrBjH,IAAKyO,oBAAoBxH,OARJ,CASrBzG,cAAekO,4BAA4BzH,OATtB,CAUrB0H,eAAgBC,oBAAoB3H,OAVf,CAWrBxG,QAASoO,wBAAwB5H,OAXZ,CAYrB6H,WAAYC,0BAA0B9H,OAZjB,CAarB+H,UAAW,kBAAA,CAAmB/Y,IAAnB,CAAyB,CAClC,UAAYA,KAAKwG,KAAjB,CACA,yBAAuBwS,YAAhB,CAA6BxS,KAA7B,CAAP,CACD,CAhBoB,CAkBrBwK,QAAS,gBAAA,CAAiB1Q,OAAjB,CAA0B,CACjC,SAAWA,QAAQzL,IAAnB,CACIoc,WAAa3Q,QAAQuI,OADzB,CAEIrW,EAAI8N,QAAQ9N,CAFhB,CAKA,GAAIqC,MAAQ,CAACrC,CAAb,CAAgB,CACd,WAAaye,WAAWnI,IAAX,CAAgBjU,IAAhB,CAAb,CACAyL,QAAQ9N,CAAR,CAAYymB,MAAZ,CACD,CAED,UAAY,KAAKzS,KAAL,CAAWlG,OAAX,CAAZ,CACA,mBAAqB,KAAKqJ,cAAL,CAAoBrJ,OAApB,CAArB,CACA,WAAa,KAAKoJ,MAAL,CAAYpJ,OAAZ,CAAb,CACA,YAAc,KAAKlF,OAAL,CAAaqG,WAAS,EAAT,CAAanB,OAAb,CAAsB,CAAEkG,MAAOA,KAAT,CAAtB,CAAb,CAAd,CACA,mBAAqB,KAAK8D,cAAL,CAAoB7I,WAAS,EAAT,CAAanB,OAAb,CAAsB,CAAElF,QAASA,OAAX,CAAtB,CAApB,CAArB,CACA,QAAU,KAAK2O,GAAL,CAAStI,WAAS,EAAT,CAAanB,OAAb,CAAsB,CAAElF,QAASA,OAAX,CAAtB,CAAT,CAAV,CACA,kBAAoB,KAAKmP,aAAL,CAAmBjK,OAAnB,CAApB,CACA,YAAc,KAAKkK,OAAL,CAAa/I,WAAS,EAAT,CAAanB,OAAb,CAAsB,CAAElF,QAASA,OAAX,CAAtB,CAAb,CAAd,CACA,eAAiB,KAAKyd,UAAL,CAAgBpX,WAAS,EAAT,CAAanB,OAAb,CAAsB,CAAElF,QAASA,OAAX,CAAtB,CAAhB,CAAjB,CACA,cAAgB,KAAK2d,SAAL,CAAe,CAAEvS,MAAOA,KAAT,CAAf,CAAhB,CAEA,oBAAsB,KAAKkS,cAAL,CAAoBpY,OAApB,CAAtB,CACIrE,IAAMid,gBAAgBjd,GAD1B,CAEIoN,OAAS6P,gBAAgB7P,MAF7B,CAIA,MAAO,CACL7C,MAAOA,KADF,CAELkD,OAAQA,MAFH,CAGLC,eAAgBA,gBAAkB,IAH7B,CAILI,IAAKA,GAJA,CAKLO,eAAgBA,cALX,CAMLlP,QAASA,OANJ,CAOLmP,cAAeA,aAPV,CAQLtO,IAAKA,GARA,CASLoN,OAAQA,MATH,CAULmB,QAASA,OAVJ,CAWLqO,WAAYA,UAXP,CAYLE,UAAWA,SAZN,CAAP,CAcD,CA1DoB,CAAvB,CA6DA,qBAAA,CAAsB9c,GAAtB,CAA2BoF,SAA3B,CAAsC,CACpCA,UAAYA,WAAalF,MAAIoF,KAAJ,CAAUtF,GAAV,CAAzB,CACA,eAAiBoF,SAAjB,CACIpB,SAAWkZ,WAAWlZ,QAD1B,CAGA,eAAiBA,SAASsF,KAAT,CAAe,GAAf,EAAoBhN,KAApB,CAA0B,CAAC,CAA3B,EAA8BnH,IAA9B,CAAmC,GAAnC,CAAjB,CAEA,kBAAkB6O,QAAX,GAAwBmZ,WAAWC,UAAX,CAAxB,EAAkDC,gBAAzD,CACD;AAID,yBAAA,CAA0Bvd,QAA1B,CAAoCvJ,CAApC,CAAuCwN,IAAvC,CAA6C,CAC3C,UAAYA,KAAKkJ,KAAjB,CAEA,GAAI,CAACA,KAAL,CAAY,eAAA,CAEZ1W,EAAE0W,MAAM9X,IAAN,CAAW,GAAX,CAAF,CAAmB2K,QAAnB,EAA6B5I,MAA7B,GAEA,eAAA,CACD;AAGD,0BAAA,CAA2B4I,QAA3B,CAAqCvJ,CAArC,CAAwCuS,KAAxC,CAA+C,CAC7C,eAAiBA,MAAMyE,UAAvB,CAEA,GAAI,CAACA,UAAL,CAAiB,eAAA,CAEjBzG,mBAAiByG,UAAjB,EAA6BlN,OAA7B,CAAqC,SAAU1H,GAAV,CAAe,CAClD,aAAepC,EAAEoC,GAAF,CAAOmH,QAAP,CAAf,CACA,UAAYyN,WAAW5U,GAAX,CAAZ;AAGA,GAAI,YAAA,GAAiB,QAArB,CAA+B,CAC7B2kB,SAAS7mB,IAAT,CAAc,SAAUC,KAAV,CAAiBU,IAAjB,CAAuB,CACnCwP,iBAAiBrQ,EAAEa,IAAF,CAAjB,CAA0Bb,CAA1B,CAA6BgX,WAAW5U,GAAX,CAA7B,EACD,CAFD,EAGD,CAJD,QAIW,YAAA,GAAiB,UAArB,CAAiC;AAEtC2kB,SAAS7mB,IAAT,CAAc,SAAUC,KAAV,CAAiBU,IAAjB,CAAuB,CACnC,WAAaoK,MAAMjL,EAAEa,IAAF,CAAN,CAAeb,CAAf,CAAb;AAEA,GAAI,aAAA,GAAkB,QAAtB,CAAgC,CAC9BqQ,iBAAiBrQ,EAAEa,IAAF,CAAjB,CAA0Bb,CAA1B,CAA6BkW,MAA7B,EACD,CACF,CAND,EAOD,CACF,CAnBD,EAqBA,eAAA,CACD,CAED,6BAAA,CAA8BlW,CAA9B,CAAiCwV,SAAjC,CAA4C,CAC1C,iBAAiBhS,IAAV,CAAe,SAAU9E,QAAV,CAAoB,CACxC,GAAIsoB,MAAMC,OAAN,CAAcvoB,QAAd,CAAJ,CAA6B,CAC3B,cAAgB2T,iBAAe3T,QAAf,CAAyB,CAAzB,CAAhB,CACIkZ,EAAIsP,UAAU,CAAV,CADR,CAEIrkB,KAAOqkB,UAAU,CAAV,CAFX,CAIA,SAAStP,CAAF,EAAKjW,MAAL,GAAgB,CAAhB,EAAqB3B,EAAE4X,CAAF,EAAK/U,IAAL,CAAUA,IAAV,CAArB,EAAwC7C,EAAE4X,CAAF,EAAK/U,IAAL,CAAUA,IAAV,EAAgBiD,IAAhB,KAA2B,EAA1E,CACD;AAGD,SAASpH,QAAF,EAAYiD,MAAZ,GAAuB,CAAvB,EAA4B3B,EAAEtB,QAAF,EAAY6D,IAAZ,GAAmBuD,IAAnB,KAA8B,EAAjE,CACD,CAXM,CAAP,CAYD,CAED,eAAA,CAAgBoY,IAAhB,CAAsB,CACpB,MAAQA,KAAKle,CAAb,CACIsU,KAAO4J,KAAK5J,IADhB,CAEI6S,eAAiBjJ,KAAKiJ,cAF1B,CAGIC,kBAAoBlJ,KAAKmJ,WAH7B,CAIIA,YAAcD,oBAAsBvc,SAAtB,CAAkC,KAAlC,CAA0Cuc,iBAJ5D;AAOA,GAAI,CAACD,cAAL,CAAqB,WAAA;;AAIrB,GAAI,qBAAA,GAA0B,QAA9B,CAAwC,qBAAA,CAExC,cAAgBA,eAAe3R,SAA/B,CACI8R,sBAAwBH,eAAe3P,cAD3C,CAEIA,eAAiB8P,wBAA0Bzc,SAA1B,CAAsC,IAAtC,CAA6Cyc,qBAFlE,CAKA,qBAAuBC,qBAAqBvnB,CAArB,CAAwBwV,SAAxB,CAAvB,CAEA,GAAI,CAACgS,gBAAL,CAAuB,WAAA;;;;;AAQvB,GAAIH,WAAJ,CAAiB,CACf,aAAernB,EAAEwnB,gBAAF,CAAf;AAGAje,SAAS2D,IAAT,CAAclN,EAAE,aAAF,CAAd,EACAuJ,SAAWA,SAASjD,MAAT,EAAX,CAEAiD,SAAWke,kBAAkBle,QAAlB,CAA4BvJ,CAA5B,CAA+BmnB,cAA/B,CAAX,CACA5d,SAAWme,iBAAiBne,QAAjB,CAA2BvJ,CAA3B,CAA8BmnB,cAA9B,CAAX,CAEA5d,SAAWoe,SAASrT,IAAT,EAAe/K,QAAf,CAAyB0F,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CAAE1G,eAAgBA,cAAlB,CAAnB,CAAzB,CAAX,CAEA,SAASnV,IAAF,CAAOkH,QAAP,CAAP,CACD,CAED,WAAa,MAAb;;AAIA,GAAIyd,MAAMC,OAAN,CAAcO,gBAAd,CAAJ,CAAqC,CACnC,sBAAwBnV,iBAAemV,gBAAf,CAAiC,CAAjC,CAAxB,CACI9oB,SAAWkpB,kBAAkB,CAAlB,CADf,CAEI/kB,KAAO+kB,kBAAkB,CAAlB,CAFX,CAIA1R,OAASlW,EAAEtB,QAAF,EAAYmE,IAAZ,CAAiBA,IAAjB,EAAuBiD,IAAvB,EAAT,CACD,CAND,IAMO,CACLoQ,OAASlW,EAAEwnB,gBAAF,EAAoBjlB,IAApB,GAA2BuD,IAA3B,EAAT,CACD;;AAID,GAAI0R,cAAJ,CAAoB,CAClB,gBAAgBlD,IAAT,EAAe4B,MAAf,CAAuBgI,IAAvB,CAAP,CACD,CAED,aAAA,CACD,CAED,sBAAA,CAAuBA,IAAvB,CAA6B,CAC3B,SAAWA,KAAK5J,IAAhB,CACIqC,UAAYuH,KAAKvH,SADrB,CAEIkR,eAAiB3J,KAAK4J,QAF1B,CAGIA,SAAWD,iBAAmBhd,SAAnB,CAA+B,IAA/B,CAAsCgd,cAHrD,CAMA,WAAaE,OAAO9Y,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CAAEiJ,eAAgBxQ,UAAUrC,IAAV,CAAlB,CAAnB,CAAP,CAAb;AAGA,GAAI4B,MAAJ,CAAY,CACV,aAAA,CACD;;AAID,GAAI4R,QAAJ,CAAc,wBAAwBxT,IAAjB,EAAuB4J,IAAvB,CAAP,CAEd,WAAA,CACD,CAED,kBAAoB,CAClBM,QAAS,gBAAA,EAAmB,CAC1B,cAAgBvR,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE6Z,gBAApF,CACA,SAAW7Z,UAAU,CAAV,CAAX,CACA,UAAYiR,IAAZ,CACI8J,YAAcC,MAAMD,WADxB,CAEIE,eAAiBD,MAAMC,cAF3B;AAKA,GAAIvR,UAAUE,MAAV,GAAqB,GAAzB,CAA8B,iBAAiB2H,OAAV,CAAkBN,IAAlB,CAAP,CAE9BA,KAAOjP,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CACxBvH,UAAWA,SADa,CAAnB,CAAP,CAIA,GAAIqR,WAAJ,CAAiB,CACf,aAAeG,cAAclZ,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CAAE5J,KAAM,SAAR,CAAmB+S,YAAa,IAAhC,CAAsCrT,MAAOkU,cAA7C,CAAnB,CAAd,CAAf,CAEA,MAAO,CACLtf,QAASwf,QADJ,CAAP,CAGD,CACD,UAAYD,cAAclZ,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CAAE5J,KAAM,OAAR,CAAnB,CAAd,CAAZ,CACA,mBAAqB6T,cAAclZ,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CAAE5J,KAAM,gBAAR,CAAnB,CAAd,CAArB,CACA,WAAa6T,cAAclZ,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CAAE5J,KAAM,QAAR,CAAnB,CAAd,CAAb,CACA,kBAAoB6T,cAAclZ,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CAAE5J,KAAM,eAAR,CAAnB,CAAd,CAApB,CACA,YAAc6T,cAAclZ,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CAAE5J,KAAM,SAAR,CAAmB+S,YAAa,IAAhC,CAAsCrT,MAAOA,KAA7C,CAAnB,CAAd,CAAd,CAEA,mBAAqBmU,cAAclZ,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CAAE5J,KAAM,gBAAR,CAA0B1L,QAASA,OAAnC,CAAnB,CAAd,CAArB,CACA,YAAcuf,cAAclZ,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CAAE5J,KAAM,SAAR,CAAmB1L,QAASA,OAA5B,CAAnB,CAAd,CAAd,CACA,QAAUuf,cAAclZ,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CAAE5J,KAAM,KAAR,CAAe1L,QAASA,OAAxB,CAAiCoP,QAASA,OAA1C,CAAnB,CAAd,CAAV,CACA,eAAiBmQ,cAAclZ,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CAAE5J,KAAM,YAAR,CAAsB1L,QAASA,OAA/B,CAAnB,CAAd,CAAjB,CACA,cAAgBuf,cAAclZ,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CAAE5J,KAAM,WAAR,CAAqBN,MAAOA,KAA5B,CAAnB,CAAd,CAAhB,CAEA,UAAYmU,cAAclZ,WAAS,EAAT,CAAaiP,IAAb,CAAmB,CAAE5J,KAAM,gBAAR,CAAnB,CAAd,GAAiE,CAAE7K,IAAK,IAAP,CAAaoN,OAAQ,IAArB,CAA7E,CACIpN,IAAMqF,MAAMrF,GADhB,CAEIoN,OAAS/H,MAAM+H,MAFnB,CAIA,MAAO,CACL7C,MAAOA,KADF,CAELpL,QAASA,OAFJ,CAGLsO,OAAQA,MAHH,CAILC,eAAgBA,cAJX,CAKLW,eAAgBA,cALX,CAMLP,IAAKA,GANA,CAOLQ,cAAeA,aAPV,CAQLtO,IAAKA,GARA,CASLoN,OAAQA,MATH,CAULmB,QAASA,OAVJ,CAWLqO,WAAYA,UAXP,CAYLE,UAAWA,SAZN,CAAP,CAcD,CApDiB,CAApB,CAuDA,oBAAuB,UAAY,CACjC,SAAW3X,kBAAkB7B,oBAAoBC,IAApB,CAAyB,gBAAA,CAAiBuF,KAAjB,CAAwB,CAC5E,kBAAoBA,MAAMwF,aAA1B,CACI1V,KAAOkQ,MAAMlQ,IADjB,CAEIrC,EAAIuS,MAAMvS,CAFd,CAGI+e,UAAYxM,MAAMwM,SAHtB,CAII7I,OAAS3D,MAAM2D,MAJnB,CAKImS,UAAY9V,MAAM8V,SALtB,CAMIrU,MAAQzB,MAAMyB,KANlB,CAOIvK,IAAM8I,MAAM9I,GAPhB,CAQIgV,WAAalM,MAAM8D,OARvB,CASA,SAAA,CAAW4M,YAAX,CAAyBqF,aAAzB,CAAwCC,cAAxC,CAAwDlC,UAAxD,CACA,2BAA2BnZ,IAApB,CAAyB,iBAAA,CAAkBC,QAAlB,CAA4B,CAC1D,MAAO,CAAP,CAAU,CACR,OAAQA,SAAShE,IAAT,CAAgBgE,SAAS5M,IAAjC,EACE,MAAA;AAEEioB,MAAQ,CAAR,CACAvF,aAAe,CAACa,eAAara,GAAb,CAAD,CAAf;;AAKF,MAAA,CACE,GAAI,EAAEsO,eAAiByQ,MAAQ,EAA3B,CAAJ,CAAoC,CAClCrb,SAAS5M,IAAT,CAAgB,EAAhB,CACA,MACD,CAEDioB,OAAS,CAAT,CACArb,SAAS5M,IAAT,CAAgB,CAAhB,CACA,gBAAgBwV,MAAT,CAAgBgC,aAAhB,CAAP,CAEF,MAAA,CACE/X,EAAImN,SAASsC,IAAb,CAEApN,KAAOrC,EAAEqC,IAAF,EAAP,CAEAimB,cAAgB,CACd7e,IAAKsO,aADS,CAEd1V,KAAMA,IAFQ,CAGdrC,EAAGA,CAHW,CAId+e,UAAWA,SAJG,CAKdiJ,YAAa,IALC,CAMdE,eAAgBlU,KANF,CAOdiP,aAAcA,YAPA,CAQd5M,QAASoI,UARK,CAAhB,CAUA8J,eAAiBE,cAAcjK,OAAd,CAAsB6J,SAAtB,CAAiCC,aAAjC,CAAjB,CAGArF,aAAaxP,IAAb,CAAkBsE,aAAlB,EACA7B,OAASjH,WAAS,EAAT,CAAaiH,MAAb,CAAqB,CAC5BtN,QAAS,aAAesN,OAAOtN,OAAtB,CAAgC,mCAAhC,CAAsE4f,KAAtE,CAA8E,iBAA9E,CAAkGD,eAAe3f,OAAjH,CAA2H,YADxG,CAArB,CAAT,CAIAmP,cAAgBwQ,eAAexQ,aAA/B,CACA5K,SAAS5M,IAAT,CAAgB,CAAhB,CACA,MAEF,OAAA,CACE8lB,WAAaS,iBAAiBT,UAAjB,CAA4B,CAAEzd,QAAS,QAAUsN,OAAOtN,OAAjB,CAA2B,QAAtC,CAA5B,CAAb,CACA,gBAAgB+G,MAAT,CAAgB,QAAhB,CAA0BV,WAAS,EAAT,CAAaiH,MAAb,CAAqB,CACpDwS,YAAaF,KADuC,CAEpDG,eAAgBH,KAFoC,CAGpDnC,WAAYA,UAHwC,CAArB,CAA1B,CAAP,CAMF,OAAA,CACA,IAAK,KAAL,CACE,gBAAgB/Y,IAAT,EAAP,CAxDJ,CA0DD,CACF,CA7DM,CA6DJwC,OA7DI,CA6DK,IA7DL,CAAP,CA8DD,CAzE4B,CAAlB,CAAX,CA2EA,wBAAA,CAAyB8Y,EAAzB,CAA6B,CAC3B,YAAY3Y,KAAL,CAAW,IAAX,CAAiBhD,SAAjB,CAAP,CACD,CAED,sBAAA,CACD,CAjFqB,EAAtB,CAmFA,YAAc,CACZ8B,MAAO,cAAA,CAAetF,GAAf,CAAoBpH,IAApB,CAA0B,CAC/B,UAAY,IAAZ,CAEA,SAAW4K,UAAUtL,MAAV,CAAmB,CAAnB,EAAwBsL,UAAU,CAAV,IAAiBpC,SAAzC,CAAqDoC,UAAU,CAAV,CAArD,CAAoE,EAA/E,CACA,yBAAyBF,oBAAoBC,IAApB,CAAyB,gBAAA,EAAmB,CACnE,uBAAA,CAAyB6b,aAAzB,CAAwChB,cAAxC,CAAwDC,QAAxD,CAAkEjZ,SAAlE,CAA6EwZ,SAA7E,CAAwFroB,CAAxF,CAA2F+e,SAA3F,CAAsG7I,MAAtG,CAA8G4S,OAA9G,CAAuH9U,KAAvH,CAA8H+D,aAA9H,CAEA,2BAA2B7K,IAApB,CAAyB,iBAAA,CAAkBC,QAAlB,CAA4B,CAC1D,MAAO,CAAP,CAAU,CACR,OAAQA,SAAShE,IAAT,CAAgBgE,SAAS5M,IAAjC,EACE,MAAA,CACEwoB,oBAAsB7K,KAAK2K,aAA3B,CAA0CA,cAAgBE,sBAAwBle,SAAxB,CAAoC,IAApC,CAA2Cke,mBAArG,CAA0HlB,eAAiB3J,KAAK4J,QAAhJ,CAA0JA,SAAWD,iBAAmBhd,SAAnB,CAA+B,IAA/B,CAAsCgd,cAA3M,CACAhZ,UAAYlF,MAAIoF,KAAJ,CAAUtF,GAAV,CAAZ,CAEA,GAAIuf,YAAYna,SAAZ,CAAJ,CAA4B,CAC1B1B,SAAS5M,IAAT,CAAgB,CAAhB,CACA,MACD,CAED,gBAAgBoP,MAAT,CAAgB,QAAhB,CAA0BE,OAAOnC,MAAjC,CAAP,CAEF,MAAA,CACE2a,UAAYY,aAAaxf,GAAb,CAAkBoF,SAAlB,CAAZ;AAGA1B,SAAS5M,IAAT,CAAgB,CAAhB,CACA,gBAAgBwV,MAAT,CAAgBtM,GAAhB,CAAqBpH,IAArB,CAA2BwM,SAA3B,CAAP,CAEF,MAAA,CACE7O,EAAImN,SAASsC,IAAb,CAEA,GAAI,CAACzP,EAAE2N,KAAP,CAAc,CACZR,SAAS5M,IAAT,CAAgB,EAAhB,CACA,MACD,CAED,gBAAgBoP,MAAT,CAAgB,QAAhB,CAA0B3P,CAA1B,CAAP,CAEF,OAAA,CAEEqC,KAAOrC,EAAEqC,IAAF,EAAP;;AAIA0c,UAAY/e,EAAE,MAAF,EAAUvB,GAAV,CAAc,SAAU+K,CAAV,CAAa3I,IAAb,CAAmB,CAC3C,SAASA,IAAF,EAAQgC,IAAR,CAAa,MAAb,CAAP,CACD,CAFW,EAET0H,OAFS,EAAZ,CAGA2L,OAASuS,cAAcjK,OAAd,CAAsB6J,SAAtB,CAAiC,CACxC5e,IAAKA,GADmC,CAExCpH,KAAMA,IAFkC,CAGxCrC,EAAGA,CAHqC,CAIxC+e,UAAWA,SAJ6B,CAKxClQ,UAAWA,SAL6B,CAMxCiZ,SAAUA,QAN8B,CAOxCzR,QAASA,SAP+B,CAAjC,CAAT,CASAyS,QAAU5S,MAAV,CAAkBlC,MAAQ8U,QAAQ9U,KAAlC,CAAyC+D,cAAgB+Q,QAAQ/Q,aAAjE;AAIA,GAAI,EAAE8Q,eAAiB9Q,aAAnB,CAAJ,CAAuC,CACrC5K,SAAS5M,IAAT,CAAgB,EAAhB,CACA,MACD,CAED4M,SAAS5M,IAAT,CAAgB,EAAhB,CACA,uBAAuB,CACrB8nB,UAAWA,SADU,CAErBtQ,cAAeA,aAFM,CAGrB1V,KAAMA,IAHe,CAIrBrC,EAAGA,CAJkB,CAKrB+e,UAAWA,SALU,CAMrB7I,OAAQA,MANa,CAOrBlC,MAAOA,KAPc,CAQrBvK,IAAKA,GARgB,CASrB4M,QAASA,SATY,CAAhB,CAAP,CAYF,OAAA,CACEH,OAAS/I,SAASsC,IAAlB,CACAtC,SAAS5M,IAAT,CAAgB,EAAhB,CACA,MAEF,OAAA,CACE2V,OAASjH,WAAS,EAAT,CAAaiH,MAAb,CAAqB,CAC5BwS,YAAa,CADe,CAE5BQ,eAAgB,CAFY,CAArB,CAAT,CAKF,OAAA,CACE,gBAAgBvZ,MAAT,CAAgB,QAAhB,CAA0BuG,MAA1B,CAAP,CAEF,OAAA,CACA,IAAK,KAAL,CACE,gBAAgB5I,IAAT,EAAP,CArFJ,CAuFD,CACF,CA1FM,CA0FJwC,OA1FI,CA0FKqG,KA1FL,CAAP,CA2FD,CA9FwB,CAAlB,GAAP,CA+FD,CApGW;;AAyGZgT,cAAe,sBAAA,CAAuB1f,GAAvB,CAA4B,CACzC,WAAa,IAAb,CAEA,yBAAyBsD,oBAAoBC,IAApB,CAAyB,iBAAA,EAAoB,CACpE,2BAA2BE,IAApB,CAAyB,kBAAA,CAAmBkc,SAAnB,CAA8B,CAC5D,MAAO,CAAP,CAAU,CACR,OAAQA,UAAUjgB,IAAV,CAAiBigB,UAAU7oB,IAAnC,EACE,MAAA,CACE6oB,UAAU7oB,IAAV,CAAiB,CAAjB,CACA,gBAAgBwV,MAAT,CAAgBtM,GAAhB,CAAP,CAEF,MAAA,CACE,iBAAiBkG,MAAV,CAAiB,QAAjB,CAA2ByZ,UAAU3Z,IAArC,CAAP,CAEF,MAAA,CACA,IAAK,KAAL,CACE,iBAAiBnC,IAAV,EAAP,CAVJ,CAYD,CACF,CAfM,CAeJ+b,QAfI,CAeMC,MAfN,CAAP,CAgBD,CAjBwB,CAAlB,GAAP,CAkBD,CA9HW,CAAd,CAiIA,YAAiBC,OAAjB;;ACznJe,SAASC,YAAT,CAAsBC,OAAtB,EAA0C;oCAARjV,MAAQ;UAAA;;;MACnDA,OAAO7S,MAAX,EAAmB;WACV8nB,QAAQhmB,MAAR,CAAe,UAACyS,MAAD,EAASwT,IAAT,EAAeC,GAAf,EAAuB;UACvC1e,QAAQuJ,OAAOmV,GAAP,CAAZ;;UAEI1e,SAAS,OAAOA,MAAM2e,QAAb,KAA0B,UAAvC,EAAmD;gBACzC3e,MAAM2e,QAAN,EAAR;OADF,MAEO;gBACG,EAAR;;;aAGK1T,SAASwT,IAAT,GAAgBze,KAAvB;KATK,EAUJ,EAVI,CAAP;;;SAaKwe,QAAQ7qB,IAAR,CAAa,EAAb,CAAP;;;ACbF,IAAMirB,cAAc,sBAApB;AACA,IAAMC,qBAAqB,MAA3B;;AAEA,AAAe,SAASC,QAAT,CAAkBN,OAAlB,EAAsC;oCAARjV,MAAQ;UAAA;;;MAC7CwV,WAAWR,+BAAaC,OAAb,SAAyBjV,MAAzB,EAAjB;;aACawV,SAAS5kB,KAAT,CAAeykB,WAAf,KAA+B,EAFO;;MAE9C1b,IAF8C;;MAG/C8b,cAAc,gBAAlB;;MAEI,CAAC9b,IAAL,EAAW;WACF6b,QAAP;kBACc,gBAAd;;;SAGK7b,KAAK4E,KAAL,CAAW,IAAX,EACJhN,KADI,CACE,CADF,EAEJtH,GAFI,CAEA,UAACyrB,IAAD,EAAU;WACNA,KAAKnjB,OAAL,CAAakjB,WAAb,EAA0B,IAA1B,CAAP;;QAEIH,mBAAmB3oB,IAAnB,CAAwB+oB,IAAxB,CAAJ,EAAmC;aAC1BA,KAAKnjB,OAAL,CAAa+iB,kBAAb,EAAiC,EAAjC,CAAP;;;WAGKI,IAAP;GATG,EAWJtrB,IAXI,CAWC,IAXD,CAAP;;;;;ACfF,AAEA,wBAAe,UAAU6O,QAAV,EAAoBzC,IAApB,EAA0B;SAChC+e,QAAP,kBACiB/e,IADjB,EAEeyC,QAFf;;;;;;ACHF,AAEA,IAAM0c,SAAS,CACb,KADa,EAEb,QAFa,EAGb,SAHa,EAIb,YAJa,EAKb,eALa,EAMb,SANa,EAOb,WAPa,EAQb,aARa,EASb,gBATa,CAAf;;AAYA,SAASC,OAAT,CAAiBhoB,GAAjB,EAAsB6I,KAAtB,EAA6Bof,GAA7B,EAAkCC,IAAlC,EAAwC7gB,GAAxC,EAA6C;MACvC0gB,OAAO3mB,IAAP,CAAY;WAAKqb,MAAMzc,GAAX;GAAZ,CAAJ,EAAiC,OAAO,EAAP;;SAE1B2nB,QAAP,oBACkB3nB,GADlB,EAE+CA,GAF/C,EAGkBioB,GAHlB,EAK+BC,IAL/B,EAOe7gB,GAPf,EASoBrH,GATpB,EAcyBA,GAdzB,EAciC6I,cAAaA,KAAb,SAAyB,IAd1D;;;AAmBF,4BAAe,UAAUqf,IAAV,EAAgB7gB,GAAhB,EAAqB4gB,GAArB,EAA0BnU,MAA1B,EAAkClL,IAAlC,EAAwC;SAC9C+e,QAAP,mBAUc/e,IAVd,EAgBWvB,GAhBX,EAqBQ,iBAAgByM,MAAhB,EAAwBzX,GAAxB,CAA4B;WAAK2rB,QAAQvL,CAAR,EAAW3I,OAAO2I,CAAP,CAAX,EAAsBwL,GAAtB,EAA2BC,IAA3B,EAAiC7gB,GAAjC,CAAL;GAA5B,EAAwE7K,IAAxE,CAA6E,MAA7E,CArBR,EAyBcyrB,GAzBd,EA6B2BC,IA7B3B,EA+BW7gB,GA/BX;;;ACrCF;;;AAGA,AACA,AACA,AACA,AACA,AAMA,AACA,AACA,AAEA,IAAM8gB,YAAY,CAChB;QACQ,OADR;QAEQ,SAFR;WAGW,yEAHX;UAAA,oBAIWtf,KAJX,EAIkB;qBACOtB,IAAIoF,KAAJ,CAAU9D,KAAV,CADP;QACNwC,QADM,cACNA,QADM;;QAEVA,QAAJ,EAAc,OAAO,IAAP;;WAEP,KAAP;;CATY,CAAlB;AAaA,IAAI+c,gBAAJ;;AAEA,SAASC,OAAT,CAAiBC,EAAjB,EAAqBC,IAArB,EAA2BC,GAA3B,EAAgCC,SAAhC,EAA2C;YAC/BC,IAAI,EAAEvoB,MAAMqoB,GAAR,EAAJ,CAAV;UACQxd,KAAR;MACM8I,SAASwU,uCAAMC,IAAN,EAAf;;MAEIzU,UAAUA,OAAO6U,IAArB,EAA2B;WAClBA,IAAP,CAAY;aAAKC,SAASC,CAAT,EAAYN,IAAZ,EAAkBE,SAAlB,CAAL;KAAZ;GADF,MAEO;YACGK,OAAR;;;SAGKhV,MAAP;;;AAGF,SAASiV,gBAAT,CAA0Bd,GAA1B,EAA+BO,GAA/B,EAAoC;MAC9B,CAACQ,GAAGC,UAAH,CAAchB,GAAd,CAAL,EAAyB;YACfe,GAAGE,SAAX,EAAsB,CAACjB,GAAD,CAAtB,EAA6BO,GAA7B;;;;AAIJ,SAASW,MAAT,CAAgB9hB,GAAhB,EAAqB;oBACEE,IAAIoF,KAAJ,CAAUtF,GAAV,CADF;MACXgE,QADW,eACXA,QADW;;sCAEeA,QAAlC;;;AAGF,SAAS+d,oBAAT,CAA8B/hB,GAA9B,EAAmC;MAC3B4gB,MAAMkB,OAAO9hB,GAAP,CAAZ;;oBACqBE,IAAIoF,KAAJ,CAAUtF,GAAV,CAFY;MAEzBgE,QAFyB,eAEzBA,QAFyB;;MAG7Bod,YAAY,KAAhB;;MAEI,CAACO,GAAGC,UAAH,CAAchB,GAAd,CAAL,EAAyB;gBACX,IAAZ;qBACiBA,GAAjB,gBAAkC5c,QAAlC;qCAC+BA,QAA/B,EAA2C,6BAA3C;;;UAGM8b,QAAQJ,aAAhB,EAA+B,CAAC1f,GAAD,CAA/B,EAAsC,kBAAtC,EAA0DohB,SAA1D;;;AAGFY,SAASC,MAAT,CAAgBnB,SAAhB,EAA2BQ,IAA3B,CAAgC,UAACY,OAAD,EAAa;uBACtBA,QAAQC,OAA7B;CADF;;AAIA,SAASC,gBAAT,CAA0BpiB,GAA1B,EAA+B6gB,IAA/B,EAAqCpU,MAArC,EAA6C;oBACtBvM,IAAIoF,KAAJ,CAAUtF,GAAV,CADsB;MACnCgE,QADmC,eACnCA,QADmC;;MAErCkJ,YAAYmV,kBAAkBre,QAAlB,EAA4Bse,cAActe,QAAd,CAA5B,CAAlB;MACMue,gBACJC,sBACE3B,IADF,EACQ7gB,GADR,EACa8hB,OAAO9hB,GAAP,CADb,EAC0ByM,MAD1B,EACkC6V,cAActe,QAAd,CADlC,CADF;;KAKGye,aAAH,CAAoBX,OAAO9hB,GAAP,CAApB,gBAA4CkN,SAA5C;KACGuV,aAAH,CAAoBX,OAAO9hB,GAAP,CAApB,qBAAiDuiB,aAAjD;KACGG,cAAH,CACE,kCADF,EAEEC,aAAa3iB,GAAb,CAFF;oDAIkC8hB,OAAO9hB,GAAP,CAAlC;;;AAGF,SAASuhB,QAAT,CAAkBhrB,CAAlB,QAA4B6qB,SAA5B,EAAuC;;MAAjBphB,GAAiB;;oBAChBE,IAAIoF,KAAJ,CAAUtF,GAAV,CADgB;MAC7BgE,QAD6B,eAC7BA,QAD6B;;UAG7Byd,OAAR;;MAEMmB,WAAW,IAAIC,IAAJ,GAAWC,OAAX,EAAjB;MACMjC,uBAAqB7c,QAArB,SAAiC4e,QAAjC,UAAN;;uBAEkBrsB,EAAE,GAAF,EAAOwI,KAAP,EAAlB,EAAkCxI,CAAlC,EAAqCyJ,GAArC;IACE,eAAF,EAAmBvJ,IAAnB,CAAwB,UAACC,KAAD,EAAQU,IAAR,EAAiB;QACjCE,QAAQf,EAAEa,IAAF,CAAd;QACMgjB,OAAO9iB,MAAM8B,IAAN,CAAW,KAAX,CAAb;QACIghB,QAAQA,KAAK9d,KAAL,CAAW,CAAX,EAAc,CAAd,MAAqB,IAAjC,EAAuC;YAC/BlD,IAAN,CAAW,KAAX,YAA0BghB,IAA1B;;GAJJ;MAOMxhB,OAAOY,cAAcjD,EAAE,GAAF,EAAOwI,KAAP,EAAd,EAA8BxI,CAA9B,EAAiC,CAAC,QAAD,CAAjC,EAA6CqC,IAA7C,EAAb;;KAEG6pB,aAAH,CAAiB5B,IAAjB,EAAuBjoB,IAAvB;;UAEQ0M,KAAR,CAActF,GAAd,EAAmBpH,IAAnB,EAAyB0oB,IAAzB,CAA8B,UAAC7U,MAAD,EAAY;QACpC2U,SAAJ,EAAe;cACLgB,gBAAR,EAA0B,CAACpiB,GAAD,EAAM6gB,IAAN,EAAYpU,MAAZ,CAA1B,EAA+C,6BAA/C;cACQsW,GAAR,4GACqB/e,QADrB,wDAGwBA,QAHxB;KAFF,MAMO;cACG+e,GAAR,mHAEuClC,IAFvC,iHAI4BA,IAJ5B;;GARJ;;;AAiBF,SAAS8B,YAAT,CAAsB3iB,GAAtB,EAA2B;oBACJE,IAAIoF,KAAJ,CAAUtF,GAAV,CADI;MACjBgE,QADiB,eACjBA,QADiB;;gCAEEA,QAA3B;;;AAGF,SAASse,aAAT,CAAuBte,QAAvB,EAAiC;MACzBzC,OAAOyC,SACVsF,KADU,CACJ,GADI,EAEVtU,GAFU,CAEN;gBAAQguB,EAAEC,MAAF,CAAS,CAAT,EAAYC,WAAZ,EAAR,GAAoCF,EAAE1mB,KAAF,CAAQ,CAAR,CAApC;GAFM,EAGVnH,IAHU,CAGL,EAHK,CAAb;SAIUoM,IAAV;"}
\ No newline at end of file
diff --git a/dist/iris.js b/dist/iris.js
deleted file mode 100644
index 75e6046a..00000000
--- a/dist/iris.js
+++ /dev/null
@@ -1,3807 +0,0 @@
-'use strict';
-
-function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; }
-
-var URL = _interopDefault(require('url'));
-var babelPolyfill = require('babel-polyfill');
-var cheerio = _interopDefault(require('cheerio'));
-var request = _interopDefault(require('request'));
-var stringDirection = _interopDefault(require('string-direction'));
-var validUrl = _interopDefault(require('valid-url'));
-var moment = _interopDefault(require('moment'));
-var wuzzy = _interopDefault(require('wuzzy'));
-var difflib = _interopDefault(require('difflib'));
-var ellipsize = _interopDefault(require('ellipsize'));
-
-var _marked = [range].map(regeneratorRuntime.mark);
-
-function range() {
- var start = arguments.length <= 0 || arguments[0] === undefined ? 1 : arguments[0];
- var end = arguments.length <= 1 || arguments[1] === undefined ? 1 : arguments[1];
- return regeneratorRuntime.wrap(function range$(_context) {
- while (1) {
- switch (_context.prev = _context.next) {
- case 0:
- if (!(start <= end)) {
- _context.next = 5;
- break;
- }
-
- _context.next = 3;
- return start += 1;
-
- case 3:
- _context.next = 0;
- break;
-
- case 5:
- case "end":
- return _context.stop();
- }
- }
- }, _marked[0], this);
-}
-
-// extremely simple url validation as a first step
-function validateUrl(_ref) {
- var hostname = _ref.hostname;
-
- // If this isn't a valid url, return an error message
- return !!hostname;
-}
-
-var Errors = {
- badUrl: {
- error: true,
- messages: 'The url parameter passed does not look like a valid URL. Please check your data and try again.'
- }
-};
-
-var REQUEST_HEADERS = {
- 'User-Agent': 'Readability - http://readability.com/about/'
-};
-
-// The number of milliseconds to attempt to fetch a resource before timing out.
-var FETCH_TIMEOUT = 10000;
-
-// Content types that we do not extract content from
-var BAD_CONTENT_TYPES = ['audio/mpeg', 'image/gif', 'image/jpeg', 'image/jpg'];
-
-var BAD_CONTENT_TYPES_RE = new RegExp('^(' + BAD_CONTENT_TYPES.join('|') + ')$', 'i');
-
-// Use this setting as the maximum size an article can be
-// for us to attempt parsing. Defaults to 5 MB.
-var MAX_CONTENT_LENGTH = 5242880;
-
-var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) {
- return typeof obj;
-} : function (obj) {
- return obj && typeof Symbol === "function" && obj.constructor === Symbol ? "symbol" : typeof obj;
-};
-
-var asyncToGenerator = function (fn) {
- return function () {
- var gen = fn.apply(this, arguments);
- return new Promise(function (resolve, reject) {
- function step(key, arg) {
- try {
- var info = gen[key](arg);
- var value = info.value;
- } catch (error) {
- reject(error);
- return;
- }
-
- if (info.done) {
- resolve(value);
- } else {
- return Promise.resolve(value).then(function (value) {
- return step("next", value);
- }, function (err) {
- return step("throw", err);
- });
- }
- }
-
- return step("next");
- });
- };
-};
-
-var defineProperty = function (obj, key, value) {
- if (key in obj) {
- Object.defineProperty(obj, key, {
- value: value,
- enumerable: true,
- configurable: true,
- writable: true
- });
- } else {
- obj[key] = value;
- }
-
- return obj;
-};
-
-var _extends = Object.assign || function (target) {
- for (var i = 1; i < arguments.length; i++) {
- var source = arguments[i];
-
- for (var key in source) {
- if (Object.prototype.hasOwnProperty.call(source, key)) {
- target[key] = source[key];
- }
- }
- }
-
- return target;
-};
-
-var slicedToArray = function () {
- function sliceIterator(arr, i) {
- var _arr = [];
- var _n = true;
- var _d = false;
- var _e = undefined;
-
- try {
- for (var _i = arr[Symbol.iterator](), _s; !(_n = (_s = _i.next()).done); _n = true) {
- _arr.push(_s.value);
-
- if (i && _arr.length === i) break;
- }
- } catch (err) {
- _d = true;
- _e = err;
- } finally {
- try {
- if (!_n && _i["return"]) _i["return"]();
- } finally {
- if (_d) throw _e;
- }
- }
-
- return _arr;
- }
-
- return function (arr, i) {
- if (Array.isArray(arr)) {
- return arr;
- } else if (Symbol.iterator in Object(arr)) {
- return sliceIterator(arr, i);
- } else {
- throw new TypeError("Invalid attempt to destructure non-iterable instance");
- }
- };
-}();
-
-function get(options) {
- return new Promise(function (resolve, reject) {
- request(options, function (err, response, body) {
- if (err) {
- reject(err);
- } else {
- resolve({ body: body, response: response });
- }
- });
- });
-}
-
-// Evaluate a response to ensure it's something we should be keeping.
-// This does not validate in the sense of a response being 200 level or
-// not. Validation here means that we haven't found reason to bail from
-// further processing of this url.
-
-function validateResponse(response) {
- var parseNon2xx = arguments.length <= 1 || arguments[1] === undefined ? false : arguments[1];
-
- // Check if we got a valid status code
- if (response.statusMessage !== 'OK') {
- if (!response.statusCode) {
- throw new Error('Unable to fetch content. Original exception was ' + response.error);
- } else if (!parseNon2xx) {
- throw new Error('Resource returned a response status code of ' + response.statusCode + ' and resource was instructed to reject non-2xx level status codes.');
- }
- }
-
- var _response$headers = response.headers;
- var contentType = _response$headers['content-type'];
- var contentLength = _response$headers['content-length'];
-
- // Check that the content is not in BAD_CONTENT_TYPES
-
- if (BAD_CONTENT_TYPES_RE.test(contentType)) {
- throw new Error('Content-type for this resource was ' + contentType + ' and is not allowed.');
- }
-
- // Check that the content length is below maximum
- if (contentLength > MAX_CONTENT_LENGTH) {
- throw new Error('Content for this resource was too large. Maximum content length is ' + MAX_CONTENT_LENGTH + '.');
- }
-
- return true;
-}
-
-// Set our response attribute to the result of fetching our URL.
-// TODO: This should gracefully handle timeouts and raise the
-// proper exceptions on the many failure cases of HTTP.
-// TODO: Ensure we are not fetching something enormous. Always return
-// unicode content for HTML, with charset conversion.
-
-var fetchResource = (function () {
- var _ref2 = asyncToGenerator(regeneratorRuntime.mark(function _callee(url, parsedUrl) {
- var options, _ref3, response, body;
-
- return regeneratorRuntime.wrap(function _callee$(_context) {
- while (1) {
- switch (_context.prev = _context.next) {
- case 0:
- parsedUrl = parsedUrl || URL.parse(encodeURI(url));
-
- options = {
- url: parsedUrl,
- headers: _extends({}, REQUEST_HEADERS),
- timeout: FETCH_TIMEOUT,
- // Don't set encoding; fixes issues
- // w/gzipped responses
- encoding: null,
- // Accept cookies
- jar: true,
- // Accept and decode gzip
- gzip: true,
- // Follow any redirect
- followAllRedirects: true
- };
- _context.next = 4;
- return get(options);
-
- case 4:
- _ref3 = _context.sent;
- response = _ref3.response;
- body = _ref3.body;
- _context.prev = 7;
-
- validateResponse(response);
- return _context.abrupt('return', { body: body, response: response });
-
- case 12:
- _context.prev = 12;
- _context.t0 = _context['catch'](7);
- return _context.abrupt('return', Errors.badUrl);
-
- case 15:
- case 'end':
- return _context.stop();
- }
- }
- }, _callee, this, [[7, 12]]);
- }));
-
- function fetchResource(_x2, _x3) {
- return _ref2.apply(this, arguments);
- }
-
- return fetchResource;
-})();
-
-function convertMetaProp($, from, to) {
- $('meta[' + from + ']').each(function (_, node) {
- var $node = $(node);
-
- var value = $node.attr(from);
- $node.attr(to, value);
- $node.removeAttr(from);
- });
-
- return $;
-}
-
-// For ease of use in extracting from meta tags,
-// replace the "content" attribute on meta tags with the
-// "value" attribute.
-//
-// In addition, normalize 'property' attributes to 'name' for ease of
-// querying later. See, e.g., og or twitter meta tags.
-
-function normalizeMetaTags($) {
- $ = convertMetaProp($, 'content', 'value');
- $ = convertMetaProp($, 'property', 'name');
- return $;
-}
-
-var IS_LINK = new RegExp('https?://', 'i');
-var IS_IMAGE = new RegExp('.(png|gif|jpe?g)', 'i');
-
-var TAGS_TO_REMOVE = ['script', 'style', 'form'].join(',');
-
-// Convert all instances of images with potentially
-// lazy loaded images into normal images.
-// Many sites will have img tags with no source, or an image tag with a src
-// attribute that a is a placeholer. We need to be able to properly fill in
-// the src attribute so the images are no longer lazy loaded.
-function convertLazyLoadedImages($) {
- $('img').each(function (_, img) {
- Reflect.ownKeys(img.attribs).forEach(function (attr) {
- var value = img.attribs[attr];
-
- if (attr !== 'src' && IS_LINK.test(value) && IS_IMAGE.test(value)) {
- $(img).attr('src', value);
- }
- });
- });
-
- return $;
-}
-
-function isComment(index, node) {
- return node.type === 'comment';
-}
-
-function cleanComments($) {
- $.root().find('*').contents().filter(isComment).remove();
-
- return $;
-}
-
-function clean($) {
- $(TAGS_TO_REMOVE).remove();
-
- $ = cleanComments($);
- return $;
-}
-
-var Resource = {
-
- // Create a Resource.
- //
- // :param url: The URL for the document we should retrieve.
- // :param response: If set, use as the response rather than
- // attempting to fetch it ourselves. Expects a
- // string.
- create: function create(url, preparedResponse, parsedUrl) {
- var _this = this;
-
- return asyncToGenerator(regeneratorRuntime.mark(function _callee() {
- var result, validResponse;
- return regeneratorRuntime.wrap(function _callee$(_context) {
- while (1) {
- switch (_context.prev = _context.next) {
- case 0:
- result = void 0;
-
- if (!preparedResponse) {
- _context.next = 6;
- break;
- }
-
- validResponse = {
- statusMessage: 'OK',
- statusCode: 200,
- headers: {
- 'content-type': 'text/html',
- 'content-length': 500
- }
- };
-
-
- result = { body: preparedResponse, response: validResponse };
- _context.next = 9;
- break;
-
- case 6:
- _context.next = 8;
- return fetchResource(url, parsedUrl);
-
- case 8:
- result = _context.sent;
-
- case 9:
- if (!result.error) {
- _context.next = 11;
- break;
- }
-
- return _context.abrupt('return', result);
-
- case 11:
- return _context.abrupt('return', _this.generateDoc(result));
-
- case 12:
- case 'end':
- return _context.stop();
- }
- }
- }, _callee, _this);
- }))();
- },
- generateDoc: function generateDoc(_ref) {
- var content = _ref.body;
- var response = _ref.response;
- var contentType = response.headers['content-type'];
-
- // TODO: Implement is_text function from
- // https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57
-
- if (!contentType.includes('html') && !contentType.includes('text')) {
- throw new Error('Content does not appear to be text.');
- }
-
- var $ = cheerio.load(content, { normalizeWhitespace: true });
-
- if ($.root().children().length === 0) {
- throw new Error('No children, likely a bad parse.');
- }
-
- $ = normalizeMetaTags($);
- $ = convertLazyLoadedImages($);
- $ = clean($);
-
- return $;
- }
-};
-
-var NYMagExtractor = {
- domain: 'nymag.com',
- content: {
- // Order by most likely. Extractor will stop on first occurence
- selectors: ['div.article-content', 'section.body', 'article.article'],
-
- // Selectors to remove from the extracted content
- clean: ['.ad', '.single-related-story'],
-
- // Object of tranformations to make on matched elements
- // Each key is the selector, each value is the tag to
- // transform to.
- // If a function is given, it should return a string
- // to convert to or nothing (in which case it will not perform
- // the transformation.
- transforms: {
- // Convert h1s to h2s
- h1: 'h2',
-
- // Convert lazy-loaded noscript images to figures
- noscript: function noscript($node) {
- var $children = $node.children();
- if ($children.length === 1 && $children.get(0).tagName === 'img') {
- return 'figure';
- }
-
- return null;
- }
- }
- },
-
- title: {
- selectors: ['h1.lede-feature-title', 'h1.headline-primary', 'h1']
- },
-
- author: {
- selectors: ['.by-authors', '.lede-feature-author']
- },
-
- dek: {
- selectors: ['.lede-feature-teaser']
- },
-
- date_published: {
- selectors: ['time.article-timestamp[datetime]', 'time.article-timestamp']
- }
-};
-
-var BloggerExtractor = {
- domain: 'blogspot.com',
- content: {
- // Blogger is insane and does not load its content
- // initially in the page, but it's all there
- // in noscript
- selectors: ['.post-content noscript'],
-
- // Selectors to remove from the extracted content
- clean: [],
-
- // Convert the noscript tag to a div
- transforms: {
- noscript: 'div'
- }
- },
-
- author: {
- selectors: ['.post-author-name']
- },
-
- title: {
- selectors: ['h2.title']
- },
-
- date_published: {
- selectors: ['span.publishdate']
- }
-};
-
-var WikipediaExtractor = {
- domain: 'wikipedia.org',
- content: {
- selectors: ['#mw-content-text'],
-
- defaultCleaner: false,
-
- // transform top infobox to an image with caption
- transforms: {
- '.infobox img': function infoboxImg($node) {
- var $parent = $node.parents('.infobox');
- // Only prepend the first image in .infobox
- if ($parent.children('img').length === 0) {
- $parent.prepend($node);
- }
- },
- '.infobox caption': 'figcaption',
- '.infobox': 'figure'
- },
-
- // Selectors to remove from the extracted content
- clean: ['.mw-editsection', 'figure tr, figure td, figure tbody', '#toc']
-
- },
-
- author: 'Wikipedia Contributors',
-
- title: {
- selectors: ['h2.title']
- },
-
- date_published: {
- selectors: ['#footer-info-lastmod']
- }
-
-};
-
-var TwitterExtractor = {
- domain: 'twitter.com',
-
- content: {
- transforms: {
- // We're transforming essentially the whole page here.
- // Twitter doesn't have nice selectors, so our initial
- // selector grabs the whole page, then we're re-writing
- // it to fit our needs before we clean it up.
- '.permalink[role=main]': function permalinkRoleMain($node, $) {
- var tweets = $node.find('.tweet');
- var $tweetContainer = $('');
- $tweetContainer.append(tweets);
- $node.replaceWith($tweetContainer);
- },
-
- // Twitter wraps @ with s, which
- // renders as a strikethrough
- s: 'span'
- },
-
- selectors: ['.permalink[role=main]'],
-
- defaultCleaner: false,
-
- clean: ['.stream-item-footer', 'button', '.tweet-details-fixer']
- },
-
- author: {
- selectors: ['.tweet.permalink-tweet .username']
- },
-
- date_published: {
- selectors: ['.permalink-tweet ._timestamp[data-time-ms]']
- }
-
-};
-
-var Extractors = {
- 'nymag.com': NYMagExtractor,
- 'blogspot.com': BloggerExtractor,
- 'wikipedia.org': WikipediaExtractor,
- 'twitter.com': TwitterExtractor
-};
-
-// Spacer images to be removed
-var SPACER_RE = new RegExp('trans|transparent|spacer|blank', 'i');
-
-// A list of tags to strip from the output if we encounter them.
-var STRIP_OUTPUT_TAGS = ['title', 'script', 'noscript', 'link', 'style', 'hr', 'embed', 'iframe', 'object'];
-
-// cleanAttributes
-var REMOVE_ATTRS = ['style', 'align'];
-var REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(function (selector) {
- return '[' + selector + ']';
-});
-var REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');
-var WHITELIST_ATTRS = ['src', 'href', 'class', 'id', 'score'];
-var WHITELIST_ATTRS_RE = new RegExp('^(' + WHITELIST_ATTRS.join('|') + ')$', 'i');
-
-// removeEmpty
-var REMOVE_EMPTY_TAGS = ['p'];
-var REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(function (tag) {
- return tag + ':empty';
-}).join(',');
-
-// cleanTags
-var CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');
-
-// cleanHeaders
-var HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];
-var HEADER_TAG_LIST = HEADER_TAGS.join(',');
-
-// // CONTENT FETCHING CONSTANTS ////
-
-// A list of strings that can be considered unlikely candidates when
-// extracting content from a resource. These strings are joined together
-// and then tested for existence using re:test, so may contain simple,
-// non-pipe style regular expression queries if necessary.
-var UNLIKELY_CANDIDATES_BLACKLIST = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot',
-// 'form', // This is too generic, has too many false positives
-'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.
-'menu', 'meta', 'nav', 'outbrain', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box
-'presence_control_external', // lifehacker.com container full of false positives
-'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'taboola', 'tools'];
-
-// A list of strings that can be considered LIKELY candidates when
-// extracting content from a resource. Essentially, the inverse of the
-// blacklist above - if something matches both blacklist and whitelist,
-// it is kept. This is useful, for example, if something has a className
-// of "rss-content entry-content". It matched 'rss', so it would normally
-// be removed, however, it's also the entry content, so it should be left
-// alone.
-//
-// These strings are joined together and then tested for existence using
-// re:test, so may contain simple, non-pipe style regular expression queries
-// if necessary.
-var UNLIKELY_CANDIDATES_WHITELIST = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form
-'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];
-
-// A list of tags which, if found inside, should cause a to NOT
-// be turned into a paragraph tag. Shallow div tags without these elements
-// should be turned into tags.
-var DIV_TO_P_BLOCK_TAGS = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');
-
-// A list of tags that should be ignored when trying to find the top candidate
-// for a document.
-var NON_TOP_CANDIDATE_TAGS = ['br', 'b', 'i', 'label', 'hr', 'area', 'base', 'basefont', 'input', 'img', 'link', 'meta'];
-
-var NON_TOP_CANDIDATE_TAGS_RE = new RegExp('^(' + NON_TOP_CANDIDATE_TAGS.join('|') + ')$', 'i');
-
-var PHOTO_HINTS = ['figure', 'photo', 'image', 'caption'];
-var PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');
-
-// A list of strings that denote a positive scoring for this content as being
-// an article container. Checked against className and id.
-//
-// TODO: Perhaps have these scale based on their odds of being quality?
-var POSITIVE_SCORE_HINTS = ['article', 'articlecontent', 'instapaper_body', 'blog', 'body', 'content', 'entry-content-asset', 'entry', 'hentry', 'main', 'Normal', 'page', 'pagination', 'permalink', 'post', 'story', 'text', '[-_]copy', // usatoday
-'\\Bcopy'];
-
-// The above list, joined into a matching regular expression
-var POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');
-
-// A list of strings that denote a negative scoring for this content as being
-// an article container. Checked against className and id.
-//
-// TODO: Perhaps have these scale based on their odds of being quality?
-var NEGATIVE_SCORE_HINTS = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'bottom', 'byline', 'clear', 'com-', 'combx', 'comment', 'comment\\B', 'contact', 'copy', 'credit', 'crumb', 'date', 'deck', 'excerpt', 'featured', // tnr.com has a featured_content which throws us off
-'foot', 'footer', 'footnote', 'graf', 'head', 'info', 'infotext', // newscientist.com copyright
-'instapaper_ignore', 'jump', 'linebreak', 'link', 'masthead', 'media', 'meta', 'modal', 'outbrain', // slate.com junk
-'promo', 'pr_', // autoblog - press release
-'related', 'respond', 'roundcontent', // lifehacker restricted content warning
-'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget'];
-// The above list, joined into a matching regular expression
-var NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');
-
-// XPath to try to determine if a page is wordpress. Not always successful.
-var IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';
-
-// A list of words that, if found in link text or URLs, likely mean that
-// this link is not a next page link.
-var EXTRANEOUS_LINK_HINTS = ['print', 'archive', 'comment', 'discuss', 'e-mail', 'email', 'share', 'reply', 'all', 'login', 'sign', 'single', 'adx', 'entry-unrelated'];
-var EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');
-
-// Match any phrase that looks like it could be page, or paging, or pagination
-var PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');
-
-// A list of all of the block level tags known in HTML5 and below. Taken from
-// http://bit.ly/qneNIT
-var BLOCK_LEVEL_TAGS = ['article', 'aside', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'col', 'colgroup', 'dd', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'map', 'object', 'ol', 'output', 'p', 'pre', 'progress', 'section', 'table', 'tbody', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'ul', 'video'];
-var BLOCK_LEVEL_TAGS_RE = new RegExp('^(' + BLOCK_LEVEL_TAGS.join('|') + ')$', 'i');
-
-// The removal is implemented as a blacklist and whitelist, this test finds
-// blacklisted elements that aren't whitelisted. We do this all in one
-// expression-both because it's only one pass, and because this skips the
-// serialization for whitelisted nodes.
-var candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');
-var CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');
-
-var candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');
-var CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');
-
-function stripUnlikelyCandidates($) {
- // Loop through the provided document and remove any non-link nodes
- // that are unlikely candidates for article content.
- //
- // Links are ignored because there are very often links to content
- // that are identified as non-body-content, but may be inside
- // article-like content.
- //
- // :param $: a cheerio object to strip nodes from
- // :return $: the cleaned cheerio object
- $('*').not('a').each(function (index, node) {
- var $node = $(node);
- var classes = $node.attr('class');
- var id = $node.attr('id');
- if (!id && !classes) return;
-
- var classAndId = (classes || '') + ' ' + (id || '');
- if (CANDIDATES_WHITELIST.test(classAndId)) {
- return;
- } else if (CANDIDATES_BLACKLIST.test(classAndId)) {
- $node.remove();
- }
- });
-
- return $;
-}
-
-// ## NOTES:
-// Another good candidate for refactoring/optimizing.
-// Very imperative code, I don't love it. - AP
-
-
-// Given cheerio object, convert consecutive tags into
-// tags instead.
-//
-// :param $: A cheerio object
-
-function brsToPs($) {
- var collapsing = false;
- $('br').each(function (index, element) {
- var nextElement = $(element).next().get(0);
-
- if (nextElement && nextElement.tagName === 'br') {
- collapsing = true;
- $(element).remove();
- } else if (collapsing) {
- collapsing = false;
- // $(element).replaceWith('')
- paragraphize(element, $, true);
- }
- });
-
- return $;
-}
-
-// Given a node, turn it into a P if it is not already a P, and
-// make sure it conforms to the constraints of a P tag (I.E. does
-// not contain any other block tags.)
-//
-// If the node is a , it treats the following inline siblings
-// as if they were its children.
-//
-// :param node: The node to paragraphize; this is a raw node
-// :param $: The cheerio object to handle dom manipulation
-// :param br: Whether or not the passed node is a br
-
-function paragraphize(node, $) {
- var br = arguments.length <= 2 || arguments[2] === undefined ? false : arguments[2];
-
- var $node = $(node);
-
- if (br) {
- var sibling = node.nextSibling;
- var p = $('');
-
- // while the next node is text or not a block level element
- // append it to a new p node
- while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))) {
- var nextSibling = sibling.nextSibling;
- $(sibling).appendTo(p);
- sibling = nextSibling;
- }
-
- $node.replaceWith(p);
- $node.remove();
- return $;
- }
-
- return $;
-}
-
-function convertDivs($) {
- $('div').each(function (index, div) {
- var $div = $(div);
- var convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;
-
- if (convertable) {
- convertNodeTo($div, $, 'p');
- }
- });
-
- return $;
-}
-
-function convertSpans($) {
- $('span').each(function (index, span) {
- var $span = $(span);
- var convertable = $span.parents('p, div').length === 0;
- if (convertable) {
- convertNodeTo($span, $, 'p');
- }
- });
-
- return $;
-}
-
-// Loop through the provided doc, and convert any p-like elements to
-// actual paragraph tags.
-//
-// Things fitting this criteria:
-// * Multiple consecutive tags.
-// * tags without block level elements inside of them
-// * tags who are not children of or tags.
-//
-// :param $: A cheerio object to search
-// :return cheerio object with new p elements
-// (By-reference mutation, though. Returned just for convenience.)
-
-function convertToParagraphs($) {
- $ = brsToPs($);
- $ = convertDivs($);
- $ = convertSpans($);
-
- return $;
-}
-
-function convertNodeTo($node, $) {
- var tag = arguments.length <= 2 || arguments[2] === undefined ? 'p' : arguments[2];
-
- var node = $node.get(0);
- if (!node) {
- return $;
- }
-
- var _$node$get = $node.get(0);
-
- var attribs = _$node$get.attribs;
-
- var attribString = Reflect.ownKeys(attribs).map(function (key) {
- return key + '=' + attribs[key];
- }).join(' ');
-
- $node.replaceWith('<' + tag + ' ' + attribString + '>' + $node.contents() + '' + tag + '>');
- return $;
-}
-
-function cleanForHeight($img, $) {
- var height = parseInt($img.attr('height'), 10);
- var width = parseInt($img.attr('width'), 10) || 20;
-
- // Remove images that explicitly have very small heights or
- // widths, because they are most likely shims or icons,
- // which aren't very useful for reading.
- if ((height || 20) < 10 || width < 10) {
- $img.remove();
- } else if (height) {
- // Don't ever specify a height on images, so that we can
- // scale with respect to width without screwing up the
- // aspect ratio.
- $img.removeAttr('height');
- }
-
- return $;
-}
-
-// Cleans out images where the source string matches transparent/spacer/etc
-// TODO This seems very aggressive - AP
-function removeSpacers($img, $) {
- if (SPACER_RE.test($img.attr('src'))) {
- $img.remove();
- }
-
- return $;
-}
-
-function cleanImages($article, $) {
- $article.find('img').each(function (index, img) {
- var $img = $(img);
-
- cleanForHeight($img, $);
- removeSpacers($img, $);
- });
-
- return $;
-}
-
-function stripJunkTags(article, $) {
- $(STRIP_OUTPUT_TAGS.join(','), article).remove();
-
- return $;
-}
-
-// H1 tags are typically the article title, which should be extracted
-// by the title extractor instead. If there's less than 3 of them (<3),
-// strip them. Otherwise, turn 'em into H2s.
-
-function cleanHOnes(article, $) {
- var $hOnes = $('h1', article);
-
- if ($hOnes.length < 3) {
- $hOnes.each(function (index, node) {
- return $(node).remove();
- });
- } else {
- $hOnes.each(function (index, node) {
- convertNodeTo($(node), $, 'h2');
- });
- }
-
- return $;
-}
-
-function removeAllButWhitelist($article) {
- // $('*', article).each((index, node) => {
- $article.find('*').each(function (index, node) {
- node.attribs = Reflect.ownKeys(node.attribs).reduce(function (acc, attr) {
- if (WHITELIST_ATTRS_RE.test(attr)) {
- return _extends({}, acc, defineProperty({}, attr, node.attribs[attr]));
- }
-
- return acc;
- }, {});
- });
-}
-
-// function removeAttrs(article, $) {
-// REMOVE_ATTRS.forEach((attr) => {
-// $(`[${attr}]`, article).removeAttr(attr);
-// });
-// }
-
-// Remove attributes like style or align
-function cleanAttributes($article) {
- removeAllButWhitelist($article);
-
- return $article;
-}
-
-function removeEmpty($article, $) {
- $article.find('p').each(function (index, p) {
- var $p = $(p);
- if ($p.text().trim() === '') $p.remove();
- });
-
- return $;
-}
-
-// // CONTENT FETCHING CONSTANTS ////
-
-// A list of strings that can be considered unlikely candidates when
-// extracting content from a resource. These strings are joined together
-// and then tested for existence using re:test, so may contain simple,
-// non-pipe style regular expression queries if necessary.
-var UNLIKELY_CANDIDATES_BLACKLIST$1 = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot', 'form', 'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.
-'menu', 'meta', 'nav', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box
-'presence_control_external', // lifehacker.com container full of false positives
-'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'tools'];
-
-// A list of strings that can be considered LIKELY candidates when
-// extracting content from a resource. Essentially, the inverse of the
-// blacklist above - if something matches both blacklist and whitelist,
-// it is kept. This is useful, for example, if something has a className
-// of "rss-content entry-content". It matched 'rss', so it would normally
-// be removed, however, it's also the entry content, so it should be left
-// alone.
-//
-// These strings are joined together and then tested for existence using
-// re:test, so may contain simple, non-pipe style regular expression queries
-// if necessary.
-var UNLIKELY_CANDIDATES_WHITELIST$1 = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form
-'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];
-
-// A list of tags which, if found inside, should cause a to NOT
-// be turned into a paragraph tag. Shallow div tags without these elements
-// should be turned into tags.
-var DIV_TO_P_BLOCK_TAGS$1 = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');
-
-// A list of tags that should be ignored when trying to find the top candidate
-// for a document.
-var NON_TOP_CANDIDATE_TAGS$1 = ['br', 'b', 'i', 'label', 'hr', 'area', 'base', 'basefont', 'input', 'img', 'link', 'meta'];
-
-var NON_TOP_CANDIDATE_TAGS_RE$1 = new RegExp('^(' + NON_TOP_CANDIDATE_TAGS$1.join('|') + ')$', 'i');
-
-// A list of selectors that specify, very clearly, either hNews or other
-// very content-specific style content, like Blogger templates.
-// More examples here: http://microformats.org/wiki/blog-post-formats
-var HNEWS_CONTENT_SELECTORS$1 = [['.hentry', '.entry-content'], ['entry', '.entry-content'], ['.entry', '.entry_content'], ['.post', '.postbody'], ['.post', '.post_body'], ['.post', '.post-body']];
-
-var PHOTO_HINTS$1 = ['figure', 'photo', 'image', 'caption'];
-var PHOTO_HINTS_RE$1 = new RegExp(PHOTO_HINTS$1.join('|'), 'i');
-
-// A list of strings that denote a positive scoring for this content as being
-// an article container. Checked against className and id.
-//
-// TODO: Perhaps have these scale based on their odds of being quality?
-var POSITIVE_SCORE_HINTS$1 = ['article', 'articlecontent', 'instapaper_body', 'blog', 'body', 'content', 'entry-content-asset', 'entry', 'hentry', 'main', 'Normal', 'page', 'pagination', 'permalink', 'post', 'story', 'text', '[-_]copy', // usatoday
-'\\Bcopy'];
-
-// The above list, joined into a matching regular expression
-var POSITIVE_SCORE_RE$1 = new RegExp(POSITIVE_SCORE_HINTS$1.join('|'), 'i');
-
-// Readability publisher-specific guidelines
-var READABILITY_ASSET$1 = new RegExp('entry-content-asset', 'i');
-
-// A list of strings that denote a negative scoring for this content as being
-// an article container. Checked against className and id.
-//
-// TODO: Perhaps have these scale based on their odds of being quality?
-var NEGATIVE_SCORE_HINTS$1 = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'bottom', 'byline', 'clear', 'com-', 'combx', 'comment', 'comment\\B', 'contact', 'copy', 'credit', 'crumb', 'date', 'deck', 'excerpt', 'featured', // tnr.com has a featured_content which throws us off
-'foot', 'footer', 'footnote', 'graf', 'head', 'info', 'infotext', // newscientist.com copyright
-'instapaper_ignore', 'jump', 'linebreak', 'link', 'masthead', 'media', 'meta', 'modal', 'outbrain', // slate.com junk
-'promo', 'pr_', // autoblog - press release
-'related', 'respond', 'roundcontent', // lifehacker restricted content warning
-'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget'];
-// The above list, joined into a matching regular expression
-var NEGATIVE_SCORE_RE$1 = new RegExp(NEGATIVE_SCORE_HINTS$1.join('|'), 'i');
-
-// A list of all of the block level tags known in HTML5 and below. Taken from
-// http://bit.ly/qneNIT
-var BLOCK_LEVEL_TAGS$1 = ['article', 'aside', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'col', 'colgroup', 'dd', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'map', 'object', 'ol', 'output', 'p', 'pre', 'progress', 'section', 'table', 'tbody', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'ul', 'video'];
-var BLOCK_LEVEL_TAGS_RE$1 = new RegExp('^(' + BLOCK_LEVEL_TAGS$1.join('|') + ')$', 'i');
-
-// The removal is implemented as a blacklist and whitelist, this test finds
-// blacklisted elements that aren't whitelisted. We do this all in one
-// expression-both because it's only one pass, and because this skips the
-// serialization for whitelisted nodes.
-var candidatesBlacklist$1 = UNLIKELY_CANDIDATES_BLACKLIST$1.join('|');
-var candidatesWhitelist$1 = UNLIKELY_CANDIDATES_WHITELIST$1.join('|');
-var PARAGRAPH_SCORE_TAGS$1 = new RegExp('^(p|li|span|pre)$', 'i');
-var CHILD_CONTENT_TAGS$1 = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');
-var BAD_TAGS$1 = new RegExp('^(address|form)$', 'i');
-
-// Get the score of a node based on its className and id.
-function getWeight(node) {
- var classes = node.attr('class');
- var id = node.attr('id');
- var score = 0;
-
- if (id) {
- // if id exists, try to score on both positive and negative
- if (POSITIVE_SCORE_RE$1.test(id)) {
- score += 25;
- }
- if (NEGATIVE_SCORE_RE$1.test(id)) {
- score -= 25;
- }
- }
-
- if (classes) {
- if (score === 0) {
- // if classes exist and id did not contribute to score
- // try to score on both positive and negative
- if (POSITIVE_SCORE_RE$1.test(classes)) {
- score += 25;
- }
- if (NEGATIVE_SCORE_RE$1.test(classes)) {
- score -= 25;
- }
- }
-
- // even if score has been set by id, add score for
- // possible photo matches
- // "try to keep photos if we can"
- if (PHOTO_HINTS_RE$1.test(classes)) {
- score += 10;
- }
-
- // add 25 if class matches entry-content-asset,
- // a class apparently instructed for use in the
- // Readability publisher guidelines
- // https://www.readability.com/developers/guidelines
- if (READABILITY_ASSET$1.test(classes)) {
- score += 25;
- }
- }
-
- return score;
-}
-
-// returns the score of a node based on
-// the node's score attribute
-// returns null if no score set
-function getScore($node) {
- return parseFloat($node.attr('score')) || null;
-}
-
-// return 1 for every comma in text
-function scoreCommas(text) {
- return (text.match(/,/g) || []).length;
-}
-
-var idkRe = new RegExp('^(p|pre)$', 'i');
-
-function scoreLength(textLength) {
- var tagName = arguments.length <= 1 || arguments[1] === undefined ? 'p' : arguments[1];
-
- var chunks = textLength / 50;
-
- if (chunks > 0) {
- var lengthBonus = void 0;
-
- // No idea why p or pre are being tamped down here
- // but just following the source for now
- // Not even sure why tagName is included here,
- // since this is only being called from the context
- // of scoreParagraph
- if (idkRe.test(tagName)) {
- lengthBonus = chunks - 2;
- } else {
- lengthBonus = chunks - 1.25;
- }
-
- return Math.min(Math.max(lengthBonus, 0), 3);
- }
-
- return 0;
-}
-
-// Score a paragraph using various methods. Things like number of
-// commas, etc. Higher is better.
-function scoreParagraph(node) {
- var score = 1;
- var text = node.text().trim();
- var textLength = text.length;
-
- // If this paragraph is less than 25 characters, don't count it.
- if (textLength < 25) {
- return 0;
- }
-
- // Add points for any commas within this paragraph
- score += scoreCommas(text);
-
- // For every 50 characters in this paragraph, add another point. Up
- // to 3 points.
- score += scoreLength(textLength);
-
- // Articles can end with short paragraphs when people are being clever
- // but they can also end with short paragraphs setting up lists of junk
- // that we strip. This negative tweaks junk setup paragraphs just below
- // the cutoff threshold.
- if (text.slice(-1) === ':') {
- score -= 1;
- }
-
- return score;
-}
-
-function setScore($node, $, score) {
- $node.attr('score', score);
- return $node;
-}
-
-function addScore($node, $, amount) {
- try {
- var score = getOrInitScore($node, $) + amount;
- setScore($node, $, score);
- } catch (e) {
- // Ignoring; error occurs in scoreNode
- }
-
- return $node;
-}
-
-// Adds 1/4 of a child's score to its parent
-function addToParent(node, $, score) {
- var parent = node.parent();
- if (parent) {
- addScore(parent, $, score * 0.25);
- }
-
- return node;
-}
-
-// gets and returns the score if it exists
-// if not, initializes a score based on
-// the node's tag type
-function getOrInitScore($node, $) {
- var weightNodes = arguments.length <= 2 || arguments[2] === undefined ? true : arguments[2];
-
- var score = getScore($node);
-
- if (score) {
- return score;
- }
-
- score = scoreNode($node);
-
- if (weightNodes) {
- score += getWeight($node);
- }
-
- addToParent($node, $, score);
-
- return score;
-}
-
-// Score an individual node. Has some smarts for paragraphs, otherwise
-// just scores based on tag.
-function scoreNode($node) {
- var _$node$get = $node.get(0);
-
- var tagName = _$node$get.tagName;
-
- // TODO: Consider ordering by most likely.
- // E.g., if divs are a more common tag on a page,
- // Could save doing that regex test on every node – AP
-
- if (PARAGRAPH_SCORE_TAGS$1.test(tagName)) {
- return scoreParagraph($node);
- } else if (tagName === 'div') {
- return 5;
- } else if (CHILD_CONTENT_TAGS$1.test(tagName)) {
- return 3;
- } else if (BAD_TAGS$1.test(tagName)) {
- return -3;
- } else if (tagName === 'th') {
- return -5;
- }
-
- return 0;
-}
-
-function convertSpans$1($node, $) {
- if ($node.get(0)) {
- var _$node$get = $node.get(0);
-
- var tagName = _$node$get.tagName;
-
-
- if (tagName === 'span') {
- // convert spans to divs
- convertNodeTo($node, $, 'div');
- }
- }
-}
-
-function addScoreTo($node, $, score) {
- if ($node) {
- convertSpans$1($node, $);
- addScore($node, $, score);
- }
-}
-
-function scorePs($, weightNodes) {
- $('p, pre').not('[score]').each(function (index, node) {
- // The raw score for this paragraph, before we add any parent/child
- // scores.
- var $node = $(node);
- $node = setScore($node, $, getOrInitScore($node, $, weightNodes));
-
- var $parent = $node.parent();
- var rawScore = scoreNode($node);
-
- addScoreTo($parent, $, rawScore, weightNodes);
- if ($parent) {
- // Add half of the individual content score to the
- // grandparent
- addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);
- }
- });
-
- return $;
-}
-
-// score content. Parents get the full value of their children's
-// content score, grandparents half
-function scoreContent($) {
- var weightNodes = arguments.length <= 1 || arguments[1] === undefined ? true : arguments[1];
-
- // First, look for special hNews based selectors and give them a big
- // boost, if they exist
- HNEWS_CONTENT_SELECTORS$1.forEach(function (_ref) {
- var _ref2 = slicedToArray(_ref, 2);
-
- var parentSelector = _ref2[0];
- var childSelector = _ref2[1];
-
- $(parentSelector + ' ' + childSelector).each(function (index, node) {
- addScore($(node).parent(parentSelector), $, 80);
- });
- });
-
- // Doubling this again
- // Previous solution caused a bug
- // in which parents weren't retaining
- // scores. This is not ideal, and
- // should be fixed.
- scorePs($, weightNodes);
- scorePs($, weightNodes);
-
- return $;
-}
-
-var NORMALIZE_RE = /\s{2,}/g;
-
-function normalizeSpaces(text) {
- return text.replace(NORMALIZE_RE, ' ').trim();
-}
-
-// Given a node type to search for, and a list of regular expressions,
-// look to see if this extraction can be found in the URL. Expects
-// that each expression in r_list will return group(1) as the proper
-// string to be cleaned.
-// Only used for date_published currently.
-function extractFromUrl(url, regexList) {
- var matchRe = regexList.find(function (re) {
- return re.test(url);
- });
- if (matchRe) {
- return matchRe.exec(url)[1];
- }
-
- return null;
-}
-
-// An expression that looks to try to find the page digit within a URL, if
-// it exists.
-// Matches:
-// page=1
-// pg=1
-// p=1
-// paging=12
-// pag=7
-// pagination/1
-// paging/88
-// pa/83
-// p/11
-//
-// Does not match:
-// pg=102
-// page:2
-var PAGE_IN_HREF_RE = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');
-
-var HAS_ALPHA_RE = /[a-z]/i;
-
-var IS_ALPHA_RE = /^[a-z]+$/i;
-var IS_DIGIT_RE = /^[0-9]+$/i;
-
-function pageNumFromUrl(url) {
- var matches = url.match(PAGE_IN_HREF_RE);
- if (!matches) return null;
-
- var pageNum = parseInt(matches[6], 10);
-
- // Return pageNum < 100, otherwise
- // return null
- return pageNum < 100 ? pageNum : null;
-}
-
-function removeAnchor(url) {
- return url.split('#')[0].replace(/\/$/, '');
-}
-
-function isGoodSegment(segment, index, firstSegmentHasLetters) {
- var goodSegment = true;
-
- // If this is purely a number, and it's the first or second
- // url_segment, it's probably a page number. Remove it.
- if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {
- goodSegment = true;
- }
-
- // If this is the first url_segment and it's just "index",
- // remove it
- if (index === 0 && segment.toLowerCase() === 'index') {
- goodSegment = false;
- }
-
- // If our first or second url_segment is smaller than 3 characters,
- // and the first url_segment had no alphas, remove it.
- if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {
- goodSegment = false;
- }
-
- return goodSegment;
-}
-
-// Take a URL, and return the article base of said URL. That is, no
-// pagination data exists in it. Useful for comparing to other links
-// that might have pagination data within them.
-function articleBaseUrl(url, parsed) {
- var parsedUrl = parsed || URL.parse(url);
- var protocol = parsedUrl.protocol;
- var host = parsedUrl.host;
- var path = parsedUrl.path;
-
-
- var firstSegmentHasLetters = false;
- var cleanedSegments = path.split('/').reverse().reduce(function (acc, rawSegment, index) {
- var segment = rawSegment;
-
- // Split off and save anything that looks like a file type.
- if (segment.includes('.')) {
- var _segment$split = segment.split('.');
-
- var _segment$split2 = slicedToArray(_segment$split, 2);
-
- var possibleSegment = _segment$split2[0];
- var fileExt = _segment$split2[1];
-
- if (IS_ALPHA_RE.test(fileExt)) {
- segment = possibleSegment;
- }
- }
-
- // If our first or second segment has anything looking like a page
- // number, remove it.
- if (PAGE_IN_HREF_RE.test(segment) && index < 2) {
- segment = segment.replace(PAGE_IN_HREF_RE, '');
- }
-
- // If we're on the first segment, check to see if we have any
- // characters in it. The first segment is actually the last bit of
- // the URL, and this will be helpful to determine if we're on a URL
- // segment that looks like "/2/" for example.
- if (index === 0) {
- firstSegmentHasLetters = HAS_ALPHA_RE.test(segment);
- }
-
- // If it's not marked for deletion, push it to cleaned_segments.
- if (isGoodSegment(segment, index, firstSegmentHasLetters)) {
- acc.push(segment);
- }
-
- return acc;
- }, []);
-
- return protocol + '//' + host + cleanedSegments.reverse().join('/');
-}
-
-// Given a string, return True if it appears to have an ending sentence
-// within it, false otherwise.
-var SENTENCE_END_RE = new RegExp('.( |$)');
-function hasSentenceEnd(text) {
- return SENTENCE_END_RE.test(text);
-}
-
-// Now that we have a top_candidate, look through the siblings of
-// it to see if any of them are decently scored. If they are, they
-// may be split parts of the content (Like two divs, a preamble and
-// a body.) Example:
-// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14
-function mergeSiblings($candidate, topScore, $) {
- if (!$candidate.parent().length) {
- return $candidate;
- }
-
- var siblingScoreThreshold = Math.max(10, topScore * 0.25);
- var wrappingDiv = $('');
-
- $candidate.parent().children().each(function (index, sibling) {
- var $sibling = $(sibling);
- // Ignore tags like BR, HR, etc
- if (NON_TOP_CANDIDATE_TAGS_RE$1.test(sibling.tagName)) {
- return null;
- }
-
- var siblingScore = getScore($sibling);
- if (siblingScore) {
- if ($sibling === $candidate) {
- wrappingDiv.append($sibling);
- } else {
- var contentBonus = 0;
- var density = linkDensity($sibling);
-
- // If sibling has a very low link density,
- // give it a small bonus
- if (density < 0.05) {
- contentBonus += 20;
- }
-
- // If sibling has a high link density,
- // give it a penalty
- if (density >= 0.5) {
- contentBonus -= 20;
- }
-
- // If sibling node has the same class as
- // candidate, give it a bonus
- if ($sibling.attr('class') === $candidate.attr('class')) {
- contentBonus += topScore * 0.2;
- }
-
- var newScore = siblingScore + contentBonus;
-
- if (newScore >= siblingScoreThreshold) {
- return wrappingDiv.append($sibling);
- } else if (sibling.tagName === 'p') {
- var siblingContent = $sibling.text();
- var siblingContentLength = textLength(siblingContent);
-
- if (siblingContentLength > 80 && density < 0.25) {
- return wrappingDiv.append($sibling);
- } else if (siblingContentLength <= 80 && density === 0 && hasSentenceEnd(siblingContent)) {
- return wrappingDiv.append($sibling);
- }
- }
- }
- }
-
- return null;
- });
-
- return wrappingDiv;
-}
-
-// After we've calculated scores, loop through all of the possible
-// candidate nodes we found and find the one with the highest score.
-function findTopCandidate($) {
- var $candidate = void 0;
- var topScore = 0;
-
- $('[score]').each(function (index, node) {
- // Ignore tags like BR, HR, etc
- if (NON_TOP_CANDIDATE_TAGS_RE$1.test(node.tagName)) {
- return;
- }
-
- var $node = $(node);
- var score = getScore($node);
-
- if (score > topScore) {
- topScore = score;
- $candidate = $node;
- }
- });
-
- // If we don't have a candidate, return the body
- // or whatever the first element is
- if (!$candidate) {
- return $('body') || $('*').first();
- }
-
- $candidate = mergeSiblings($candidate, topScore, $);
-
- return $candidate;
-}
-
-function removeUnlessContent($node, $, weight) {
- // Explicitly save entry-content-asset tags, which are
- // noted as valuable in the Publisher guidelines. For now
- // this works everywhere. We may want to consider making
- // this less of a sure-thing later.
- if ($node.hasClass('entry-content-asset')) {
- return;
- }
-
- var content = normalizeSpaces($node.text());
-
- if (scoreCommas(content) < 10) {
- var pCount = $('p', $node).length;
- var inputCount = $('input', $node).length;
-
- // Looks like a form, too many inputs.
- if (inputCount > pCount / 3) {
- $node.remove();
- return;
- }
-
- var contentLength = content.length;
- var imgCount = $('img', $node).length;
-
- // Content is too short, and there are no images, so
- // this is probably junk content.
- if (contentLength < 25 && imgCount === 0) {
- $node.remove();
- return;
- }
-
- var density = linkDensity($node);
-
- // Too high of link density, is probably a menu or
- // something similar.
- // console.log(weight, density, contentLength)
- if (weight < 25 && density > 0.2 && contentLength > 75) {
- $node.remove();
- return;
- }
-
- // Too high of a link density, despite the score being
- // high.
- if (weight >= 25 && density > 0.5) {
- // Don't remove the node if it's a list and the
- // previous sibling starts with a colon though. That
- // means it's probably content.
- var tagName = $node.get(0).tagName;
- var nodeIsList = tagName === 'ol' || tagName === 'ul';
- if (nodeIsList) {
- var previousNode = $node.prev();
- if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {
- return;
- }
- }
-
- $node.remove();
- return;
- }
-
- var scriptCount = $('script', $node).length;
-
- // Too many script tags, not enough content.
- if (scriptCount > 0 && contentLength < 150) {
- $node.remove();
- return;
- }
- }
-}
-
-// Given an article, clean it of some superfluous content specified by
-// tags. Things like forms, ads, etc.
-//
-// Tags is an array of tag name's to search through. (like div, form,
-// etc)
-//
-// Return this same doc.
-function cleanTags($article, $) {
- $(CLEAN_CONDITIONALLY_TAGS, $article).each(function (index, node) {
- var $node = $(node);
- var weight = getScore($node);
- if (!weight) {
- weight = getOrInitScore($node, $);
- setScore($node, $, weight);
- }
-
- // drop node if its weight is < 0
- if (weight < 0) {
- $node.remove();
- } else {
- // deteremine if node seems like content
- removeUnlessContent($node, $, weight);
- }
- });
-
- return $;
-}
-
-function cleanHeaders($article, $) {
- var title = arguments.length <= 2 || arguments[2] === undefined ? '' : arguments[2];
-
- $(HEADER_TAG_LIST, $article).each(function (index, header) {
- var $header = $(header);
- // Remove any headers that appear before all other p tags in the
- // document. This probably means that it was part of the title, a
- // subtitle or something else extraneous like a datestamp or byline,
- // all of which should be handled by other metadata handling.
- if ($($header, $article).prevAll('p').length === 0) {
- return $header.remove();
- }
-
- // Remove any headers that match the title exactly.
- if (normalizeSpaces($(header).text()) === title) {
- return $header.remove();
- }
-
- // If this header has a negative weight, it's probably junk.
- // Get rid of it.
- if (getWeight($(header)) < 0) {
- return $header.remove();
- }
-
- return $header;
- });
-
- return $;
-}
-
-// Rewrite the tag name to div if it's a top level node like body or
-// html to avoid later complications with multiple body tags.
-
-function rewriteTopLevel(article, $) {
- // I'm not using context here because
- // it's problematic when converting the
- // top-level/root node - AP
- $ = convertNodeTo($('html'), $, 'div');
- $ = convertNodeTo($('body'), $, 'div');
-
- return $;
-}
-
-function absolutize($, rootUrl, attr, $content) {
- $('[' + attr + ']', $content).each(function (_, node) {
- var url = node.attribs[attr];
- var absoluteUrl = URL.resolve(rootUrl, url);
-
- node.attribs[attr] = absoluteUrl;
- });
-}
-
-function makeLinksAbsolute($content, $, url) {
- ['href', 'src'].forEach(function (attr) {
- return absolutize($, url, attr, $content);
- });
-
- return $content;
-}
-
-function textLength(text) {
- return text.trim().replace(/\s+/g, ' ').length;
-}
-
-// Determines what percentage of the text
-// in a node is link text
-// Takes a node, returns a float
-function linkDensity($node) {
- var totalTextLength = textLength($node.text());
-
- var linkText = $node.find('a').text();
- var linkLength = textLength(linkText);
-
- if (totalTextLength > 0) {
- return linkLength / totalTextLength;
- } else if (totalTextLength === 0 && linkLength > 0) {
- return 1;
- }
-
- return 0;
-}
-
-// Given a node type to search for, and a list of meta tag names to
-// search for, find a meta tag associated.
-
-function extractFromMeta($, metaNames, cachedNames) {
- var cleanTags = arguments.length <= 3 || arguments[3] === undefined ? true : arguments[3];
-
- var foundNames = metaNames.filter(function (name) {
- return cachedNames.indexOf(name) !== -1;
- });
-
- var _iteratorNormalCompletion = true;
- var _didIteratorError = false;
- var _iteratorError = undefined;
-
- try {
- var _loop = function _loop() {
- var name = _step.value;
-
- var type = 'name';
- var value = 'value';
-
- var nodes = $('meta[' + type + '="' + name + '"]');
-
- // Get the unique value of every matching node, in case there
- // are two meta tags with the same name and value.
- // Remove empty values.
- var values = nodes.map(function (index, node) {
- return $(node).attr(value);
- }).toArray().filter(function (text) {
- return text !== '';
- });
-
- // If we have more than one value for the same name, we have a
- // conflict and can't trust any of them. Skip this name. If we have
- // zero, that means our meta tags had no values. Skip this name
- // also.
- if (values.length === 1) {
- var metaValue = void 0;
- // Meta values that contain HTML should be stripped, as they
- // weren't subject to cleaning previously.
- if (cleanTags) {
- metaValue = stripTags(values[0], $);
- } else {
- metaValue = values[0];
- }
-
- return {
- v: metaValue
- };
- }
- };
-
- for (var _iterator = foundNames[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
- var _ret = _loop();
-
- if ((typeof _ret === 'undefined' ? 'undefined' : _typeof(_ret)) === "object") return _ret.v;
- }
-
- // If nothing is found, return null
- } catch (err) {
- _didIteratorError = true;
- _iteratorError = err;
- } finally {
- try {
- if (!_iteratorNormalCompletion && _iterator.return) {
- _iterator.return();
- }
- } finally {
- if (_didIteratorError) {
- throw _iteratorError;
- }
- }
- }
-
- return null;
-}
-
-function isGoodNode($node, maxChildren) {
- // If it has a number of children, it's more likely a container
- // element. Skip it.
- if ($node.children().length > maxChildren) {
- return false;
- }
- // If it looks to be within a comment, skip it.
- if (withinComment($node)) {
- return false;
- }
-
- return true;
-}
-
-// Given a a list of selectors find content that may
-// be extractable from the document. This is for flat
-// meta-information, like author, title, date published, etc.
-function extractFromSelectors($, selectors) {
- var maxChildren = arguments.length <= 2 || arguments[2] === undefined ? 1 : arguments[2];
- var textOnly = arguments.length <= 3 || arguments[3] === undefined ? true : arguments[3];
- var _iteratorNormalCompletion = true;
- var _didIteratorError = false;
- var _iteratorError = undefined;
-
- try {
- for (var _iterator = selectors[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
- var selector = _step.value;
-
- var nodes = $(selector);
-
- // If we didn't get exactly one of this selector, this may be
- // a list of articles or comments. Skip it.
- if (nodes.length === 1) {
- var $node = $(nodes[0]);
-
- if (isGoodNode($node, maxChildren)) {
- var content = void 0;
- if (textOnly) {
- content = $node.text();
- } else {
- content = $node.html();
- }
-
- if (content) {
- return content;
- }
- }
- }
- }
- } catch (err) {
- _didIteratorError = true;
- _iteratorError = err;
- } finally {
- try {
- if (!_iteratorNormalCompletion && _iterator.return) {
- _iterator.return();
- }
- } finally {
- if (_didIteratorError) {
- throw _iteratorError;
- }
- }
- }
-
- return null;
-}
-
-// strips all tags from a string of text
-function stripTags(text, $) {
- // Wrapping text in html element prevents errors when text
- // has no html
- var cleanText = $('' + text + '').text();
- return cleanText === '' ? text : cleanText;
-}
-
-function withinComment($node) {
- var parents = $node.parents().toArray();
- var commentParent = parents.find(function (parent) {
- var classAndId = parent.attribs.class + ' ' + parent.attribs.id;
- return classAndId.includes('comment');
- });
-
- return commentParent !== undefined;
-}
-
-// Given a node, determine if it's article-like enough to return
-// param: node (a cheerio node)
-// return: boolean
-
-function nodeIsSufficient($node) {
- return $node.text().trim().length >= 100;
-}
-
-function isWordpress($) {
- return $(IS_WP_SELECTOR).length > 0;
-}
-
-// CLEAN AUTHOR CONSTANTS
-var CLEAN_AUTHOR_RE = /^\s*(posted |written )?by\s*:?\s*(.*)/i;
-// author = re.sub(r'^\s*(posted |written )?by\s*:?\s*(.*)(?i)',
-
-// CLEAN DEK CONSTANTS
-var TEXT_LINK_RE = new RegExp('http(s)?://', 'i');
-// CLEAN DATE PUBLISHED CONSTANTS
-var MS_DATE_STRING = /^\d{13}$/i;
-var SEC_DATE_STRING = /^\d{10}$/i;
-var CLEAN_DATE_STRING_RE = /^\s*published\s*:?\s*(.*)/i;
-var TIME_MERIDIAN_SPACE_RE = /(.*\d)(am|pm)(.*)/i;
-var TIME_MERIDIAN_DOTS_RE = /\.m\./i;
-var months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'];
-var allMonths = months.join('|');
-var timestamp1 = '[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';
-var timestamp2 = '[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';
-var SPLIT_DATE_STRING = new RegExp('(' + timestamp1 + ')|(' + timestamp2 + ')|([0-9]{1,4})|(' + allMonths + ')', 'ig');
-
-// CLEAN TITLE CONSTANTS
-// A regular expression that will match separating characters on a
-// title, that usually denote breadcrumbs or something similar.
-var TITLE_SPLITTERS_RE = /(: | - | \| )/g;
-
-var DOMAIN_ENDINGS_RE = new RegExp('.com$|.net$|.org$|.co.uk$', 'g');
-
-// Take an author string (like 'By David Smith ') and clean it to
-// just the name(s): 'David Smith'.
-function cleanAuthor(author) {
- return author.replace(CLEAN_AUTHOR_RE, '$2').trim();
-}
-
-function clean$1(leadImageUrl) {
- leadImageUrl = leadImageUrl.trim();
- if (validUrl.isWebUri(leadImageUrl)) {
- return leadImageUrl;
- }
-
- return null;
-}
-
-// Take a dek HTML fragment, and return the cleaned version of it.
-// Return None if the dek wasn't good enough.
-function cleanDek(dek, _ref) {
- var $ = _ref.$;
-
- // Sanity check that we didn't get too short or long of a dek.
- if (dek.length > 1000 || dek.length < 5) return null;
-
- var dekText = stripTags(dek, $);
-
- // Plain text links shouldn't exist in the dek. If we have some, it's
- // not a good dek - bail.
- if (TEXT_LINK_RE.test(dekText)) return null;
-
- return dekText.trim();
-}
-
-// Is there a compelling reason to use moment here?
-// Mostly only being used for the isValid() method,
-// but could just check for 'Invalid Date' string.
-
-function cleanDateString(dateString) {
- return (dateString.match(SPLIT_DATE_STRING) || []).join(' ').replace(TIME_MERIDIAN_DOTS_RE, 'm').replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3').replace(CLEAN_DATE_STRING_RE, '$1').trim();
-}
-
-// Take a date published string, and hopefully return a date out of
-// it. Return none if we fail.
-function cleanDatePublished(dateString) {
- // If string is in milliseconds or seconds, convert to int
- if (MS_DATE_STRING.test(dateString) || SEC_DATE_STRING.test(dateString)) {
- dateString = parseInt(dateString, 10);
- }
-
- var date = moment(new Date(dateString));
-
- if (!date.isValid()) {
- dateString = cleanDateString(dateString);
- date = moment(new Date(dateString));
- }
-
- return date.isValid() ? date.toISOString() : null;
-}
-
-// Clean our article content, returning a new, cleaned node.
-
-function extractCleanNode(article, _ref) {
- var $ = _ref.$;
- var _ref$cleanConditional = _ref.cleanConditionally;
- var cleanConditionally = _ref$cleanConditional === undefined ? true : _ref$cleanConditional;
- var _ref$title = _ref.title;
- var title = _ref$title === undefined ? '' : _ref$title;
- var _ref$url = _ref.url;
- var url = _ref$url === undefined ? '' : _ref$url;
-
- // Rewrite the tag name to div if it's a top level node like body or
- // html to avoid later complications with multiple body tags.
- rewriteTopLevel(article, $);
-
- // Drop small images and spacer images
- cleanImages(article, $);
-
- // Drop certain tags like , etc
- // This is -mostly- for cleanliness, not security.
- stripJunkTags(article, $);
-
- // H1 tags are typically the article title, which should be extracted
- // by the title extractor instead. If there's less than 3 of them (<3),
- // strip them. Otherwise, turn 'em into H2s.
- cleanHOnes(article, $);
-
- // Clean headers
- cleanHeaders(article, $, title);
-
- // Make links absolute
- makeLinksAbsolute(article, $, url);
-
- // Remove unnecessary attributes
- cleanAttributes(article);
-
- // We used to clean UL's and OL's here, but it was leading to
- // too many in-article lists being removed. Consider a better
- // way to detect menus particularly and remove them.
- cleanTags(article, $, cleanConditionally);
-
- // Remove empty paragraph nodes
- removeEmpty(article, $);
-
- return article;
-}
-
-function cleanTitle(title, _ref) {
- var url = _ref.url;
- var $ = _ref.$;
-
- // If title has |, :, or - in it, see if
- // we can clean it up.
- if (TITLE_SPLITTERS_RE.test(title)) {
- title = resolveSplitTitle(title, url);
- }
-
- // Final sanity check that we didn't get a crazy title.
- // if (title.length > 150 || title.length < 15) {
- if (title.length > 150) {
- // If we did, return h1 from the document if it exists
- var h1 = $('h1');
- if (h1.length === 1) {
- title = h1.text();
- }
- }
-
- // strip any html tags in the title text
- return stripTags(title, $).trim();
-}
-
-function extractBreadcrumbTitle(splitTitle, text) {
- // This must be a very breadcrumbed title, like:
- // The Best Gadgets on Earth : Bits : Blogs : NYTimes.com
- // NYTimes - Blogs - Bits - The Best Gadgets on Earth
- if (splitTitle.length >= 6) {
- var _ret = function () {
- // Look to see if we can find a breadcrumb splitter that happens
- // more than once. If we can, we'll be able to better pull out
- // the title.
- var termCounts = splitTitle.reduce(function (acc, titleText) {
- acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;
- return acc;
- }, {});
-
- var _Reflect$ownKeys$redu = Reflect.ownKeys(termCounts).reduce(function (acc, key) {
- if (acc[1] < termCounts[key]) {
- return [key, termCounts[key]];
- }
-
- return acc;
- }, [0, 0]);
-
- var _Reflect$ownKeys$redu2 = slicedToArray(_Reflect$ownKeys$redu, 2);
-
- var maxTerm = _Reflect$ownKeys$redu2[0];
- var termCount = _Reflect$ownKeys$redu2[1];
-
- // We found a splitter that was used more than once, so it
- // is probably the breadcrumber. Split our title on that instead.
- // Note: max_term should be <= 4 characters, so that " >> "
- // will match, but nothing longer than that.
-
- if (termCount >= 2 && maxTerm.length <= 4) {
- splitTitle = text.split(maxTerm);
- }
-
- var splitEnds = [splitTitle[0], splitTitle.slice(-1)];
- var longestEnd = splitEnds.reduce(function (acc, end) {
- return acc.length > end.length ? acc : end;
- }, '');
-
- if (longestEnd.length > 10) {
- return {
- v: longestEnd
- };
- }
-
- return {
- v: text
- };
- }();
-
- if ((typeof _ret === 'undefined' ? 'undefined' : _typeof(_ret)) === "object") return _ret.v;
- }
-
- return null;
-}
-
-function cleanDomainFromTitle(splitTitle, url) {
- // Search the ends of the title, looking for bits that fuzzy match
- // the URL too closely. If one is found, discard it and return the
- // rest.
- //
- // Strip out the big TLDs - it just makes the matching a bit more
- // accurate. Not the end of the world if it doesn't strip right.
- var _URL$parse = URL.parse(url);
-
- var host = _URL$parse.host;
-
- var nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');
-
- var startSlug = splitTitle[0].toLowerCase().replace(' ', '');
- var startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);
-
- if (startSlugRatio > 0.4 && startSlug.length > 5) {
- return splitTitle.slice(2).join('');
- }
-
- var endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '');
- var endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);
-
- if (endSlugRatio > 0.4 && endSlug.length >= 5) {
- return splitTitle.slice(0, -2).join('');
- }
-
- return null;
-}
-
-// Given a title with separators in it (colons, dashes, etc),
-// resolve whether any of the segments should be removed.
-function resolveSplitTitle(title) {
- var url = arguments.length <= 1 || arguments[1] === undefined ? '' : arguments[1];
-
- // Splits while preserving splitters, like:
- // ['The New New York', ' - ', 'The Washington Post']
- var splitTitle = title.split(TITLE_SPLITTERS_RE);
- if (splitTitle.length === 1) {
- return title;
- }
-
- var newTitle = extractBreadcrumbTitle(splitTitle, title);
- if (newTitle) return newTitle;
-
- newTitle = cleanDomainFromTitle(splitTitle, url);
- if (newTitle) return newTitle;
-
- // Fuzzy ratio didn't find anything, so this title is probably legit.
- // Just return it all.
- return title;
-}
-
-var Cleaners = {
- author: cleanAuthor,
- lead_image_url: clean$1,
- dek: cleanDek,
- date_published: cleanDatePublished,
- content: extractCleanNode,
- title: cleanTitle
-};
-
-// Using a variety of scoring techniques, extract the content most
-// likely to be article text.
-//
-// If strip_unlikely_candidates is True, remove any elements that
-// match certain criteria first. (Like, does this element have a
-// classname of "comment")
-//
-// If weight_nodes is True, use classNames and IDs to determine the
-// worthiness of nodes.
-//
-// Returns a cheerio object $
-function extractBestNode($, opts) {
- // clone the node so we can get back to our
- // initial parsed state if needed
- // TODO Do I need this? – AP
- // let $root = $.root().clone()
-
-
- if (opts.stripUnlikelyCandidates) {
- $ = stripUnlikelyCandidates($);
- }
-
- $ = convertToParagraphs($);
- $ = scoreContent($, opts.weightNodes);
- var $topCandidate = findTopCandidate($);
-
- return $topCandidate;
-}
-
-var GenericContentExtractor = {
- defaultOpts: {
- stripUnlikelyCandidates: true,
- weightNodes: true,
- cleanConditionally: true
- },
-
- // Extract the content for this resource - initially, pass in our
- // most restrictive opts which will return the highest quality
- // content. On each failure, retry with slightly more lax opts.
- //
- // :param return_type: string. If "node", should return the content
- // as a cheerio node rather than as an HTML string.
- //
- // Opts:
- // stripUnlikelyCandidates: Remove any elements that match
- // non-article-like criteria first.(Like, does this element
- // have a classname of "comment")
- //
- // weightNodes: Modify an elements score based on whether it has
- // certain classNames or IDs. Examples: Subtract if a node has
- // a className of 'comment', Add if a node has an ID of
- // 'entry-content'.
- //
- // cleanConditionally: Clean the node to return of some
- // superfluous content. Things like forms, ads, etc.
- extract: function extract(_ref, opts) {
- var $ = _ref.$;
- var html = _ref.html;
- var title = _ref.title;
- var url = _ref.url;
-
- opts = _extends({}, this.defaultOpts, opts);
-
- $ = $ || cheerio.load(html);
-
- // Cascade through our extraction-specific opts in an ordered fashion,
- // turning them off as we try to extract content.
- var node = this.getContentNode($, title, url, opts);
-
- if (nodeIsSufficient(node)) {
- return this.cleanAndReturnNode(node, $);
- }
-
- // We didn't succeed on first pass, one by one disable our
- // extraction opts and try again.
- var _iteratorNormalCompletion = true;
- var _didIteratorError = false;
- var _iteratorError = undefined;
-
- try {
- for (var _iterator = Reflect.ownKeys(opts).filter(function (k) {
- return opts[k] === true;
- })[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
- var key = _step.value;
-
- opts[key] = false;
- $ = cheerio.load(html);
-
- node = this.getContentNode($, title, url, opts);
-
- if (nodeIsSufficient(node)) {
- break;
- }
- }
- } catch (err) {
- _didIteratorError = true;
- _iteratorError = err;
- } finally {
- try {
- if (!_iteratorNormalCompletion && _iterator.return) {
- _iterator.return();
- }
- } finally {
- if (_didIteratorError) {
- throw _iteratorError;
- }
- }
- }
-
- return this.cleanAndReturnNode(node, $);
- },
-
-
- // Get node given current options
- getContentNode: function getContentNode($, title, url, opts) {
- return extractCleanNode(extractBestNode($, opts), {
- $: $,
- cleanConditionally: opts.cleanConditionally,
- title: title,
- url: url
- });
- },
-
-
- // Once we got here, either we're at our last-resort node, or
- // we broke early. Make sure we at least have -something- before we
- // move forward.
- cleanAndReturnNode: function cleanAndReturnNode(node, $) {
- if (!node) {
- return null;
- }
-
- return normalizeSpaces($.html(node));
-
- // if return_type == "html":
- // return normalize_spaces(node_to_html(node))
- // else:
- // return node
- }
-};
-
-// TODO: It would be great if we could merge the meta and selector lists into
-// a list of objects, because we could then rank them better. For example,
-// .hentry .entry-title is far better suited than .
-
-// An ordered list of meta tag names that denote likely article titles. All
-// attributes should be lowercase for faster case-insensitive matching. From
-// most distinct to least distinct.
-var STRONG_TITLE_META_TAGS = ['tweetmeme-title', 'dc.title', 'rbtitle', 'headline', 'title'];
-
-// og:title is weak because it typically contains context that we don't like,
-// for example the source site's name. Gotta get that brand into facebook!
-var WEAK_TITLE_META_TAGS = ['og:title'];
-
-// An ordered list of XPath Selectors to find likely article titles. From
-// most explicit to least explicit.
-//
-// Note - this does not use classes like CSS. This checks to see if the string
-// exists in the className, which is not as accurate as .className (which
-// splits on spaces/endlines), but for our purposes it's close enough. The
-// speed tradeoff is worth the accuracy hit.
-var STRONG_TITLE_SELECTORS = ['.hentry .entry-title', 'h1#articleHeader', 'h1.articleHeader', 'h1.article', '.instapaper_title', '#meebo-title'];
-
-var WEAK_TITLE_SELECTORS = ['article h1', '#entry-title', '.entry-title', '#entryTitle', '#entrytitle', '.entryTitle', '.entrytitle', '#articleTitle', '.articleTitle', 'post post-title', 'h1.title', 'h2.article', 'h1', 'html head title', 'title'];
-
-var GenericTitleExtractor = {
- extract: function extract(_ref) {
- var $ = _ref.$;
- var url = _ref.url;
- var metaCache = _ref.metaCache;
-
- // First, check to see if we have a matching meta tag that we can make
- // use of that is strongly associated with the headline.
- var title = void 0;
-
- title = extractFromMeta($, STRONG_TITLE_META_TAGS, metaCache);
- if (title) return cleanTitle(title, { url: url, $: $ });
-
- // Second, look through our content selectors for the most likely
- // article title that is strongly associated with the headline.
- title = extractFromSelectors($, STRONG_TITLE_SELECTORS);
- if (title) return cleanTitle(title, { url: url, $: $ });
-
- // Third, check for weaker meta tags that may match.
- title = extractFromMeta($, WEAK_TITLE_META_TAGS, metaCache);
- if (title) return cleanTitle(title, { url: url, $: $ });
-
- // Last, look for weaker selector tags that may match.
- title = extractFromSelectors($, WEAK_TITLE_SELECTORS);
- if (title) return cleanTitle(title, { url: url, $: $ });
-
- // If no matches, return an empty string
- return '';
- }
-};
-
-// An ordered list of meta tag names that denote likely article authors. All
-// attributes should be lowercase for faster case-insensitive matching. From
-// most distinct to least distinct.
-//
-// Note: "author" is too often the -developer- of the page, so it is not
-// added here.
-var AUTHOR_META_TAGS = ['byl', 'clmst', 'dc.author', 'dcsext.author', 'dc.creator', 'rbauthors', 'authors'];
-
-var AUTHOR_MAX_LENGTH = 300;
-
-// An ordered list of XPath Selectors to find likely article authors. From
-// most explicit to least explicit.
-//
-// Note - this does not use classes like CSS. This checks to see if the string
-// exists in the className, which is not as accurate as .className (which
-// splits on spaces/endlines), but for our purposes it's close enough. The
-// speed tradeoff is worth the accuracy hit.
-var AUTHOR_SELECTORS = ['.entry .entry-author', '.author.vcard .fn', '.author .vcard .fn', '.byline.vcard .fn', '.byline .vcard .fn', '.byline .by .author', '.byline .by', '.byline .author', '.post-author.vcard', '.post-author .vcard', 'a[rel=author]', '#by_author', '.by_author', '#entryAuthor', '.entryAuthor', '.byline a[href*=author]', '#author .authorname', '.author .authorname', '#author', '.author', '.articleauthor', '.ArticleAuthor', '.byline'];
-
-// An ordered list of Selectors to find likely article authors, with
-// regular expression for content.
-var bylineRe = /^[\n\s]*By/i;
-var BYLINE_SELECTORS_RE = [['#byline', bylineRe], ['.byline', bylineRe]];
-
-var GenericAuthorExtractor = {
- extract: function extract(_ref) {
- var $ = _ref.$;
- var metaCache = _ref.metaCache;
-
- var author = void 0;
-
- // First, check to see if we have a matching
- // meta tag that we can make use of.
- author = extractFromMeta($, AUTHOR_META_TAGS, metaCache);
- if (author && author.length < AUTHOR_MAX_LENGTH) {
- return cleanAuthor(author);
- }
-
- // Second, look through our selectors looking for potential authors.
- author = extractFromSelectors($, AUTHOR_SELECTORS, 2);
- if (author && author.length < AUTHOR_MAX_LENGTH) {
- return cleanAuthor(author);
- }
-
- // Last, use our looser regular-expression based selectors for
- // potential authors.
- var _iteratorNormalCompletion = true;
- var _didIteratorError = false;
- var _iteratorError = undefined;
-
- try {
- for (var _iterator = BYLINE_SELECTORS_RE[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
- var _ref4 = _step.value;
-
- var _ref3 = slicedToArray(_ref4, 2);
-
- var selector = _ref3[0];
- var regex = _ref3[1];
-
- var node = $(selector);
- if (node.length === 1) {
- var text = node.text();
- if (regex.test(text)) {
- return cleanAuthor(text);
- }
- }
- }
- } catch (err) {
- _didIteratorError = true;
- _iteratorError = err;
- } finally {
- try {
- if (!_iteratorNormalCompletion && _iterator.return) {
- _iterator.return();
- }
- } finally {
- if (_didIteratorError) {
- throw _iteratorError;
- }
- }
- }
-
- return null;
- }
-};
-
-// An ordered list of meta tag names that denote
-// likely date published dates. All attributes
-// should be lowercase for faster case-insensitive matching.
-// From most distinct to least distinct.
-var DATE_PUBLISHED_META_TAGS = ['article:published_time', 'displaydate', 'dc.date', 'dc.date.issued', 'rbpubdate', 'publish_date', 'pub_date', 'pagedate', 'pubdate', 'revision_date', 'doc_date', 'date_created', 'content_create_date', 'lastmodified', 'created', 'date'];
-
-// An ordered list of XPath Selectors to find
-// likely date published dates. From most explicit
-// to least explicit.
-var DATE_PUBLISHED_SELECTORS = ['.hentry .dtstamp.published', '.hentry .published', '.hentry .dtstamp.updated', '.hentry .updated', '.single .published', '.meta .published', '.meta .postDate', '.entry-date', '.byline .date', '.postmetadata .date', '.article_datetime', '.date-header', '.story-date', '.dateStamp', '#story .datetime', '.dateline', '.pubdate'];
-
-// An ordered list of compiled regular expressions to find likely date
-// published dates from the URL. These should always have the first
-// reference be a date string that is parseable by dateutil.parser.parse
-var abbrevMonthsStr = '(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)';
-var DATE_PUBLISHED_URL_RES = [
-// /2012/01/27/ but not /2012/01/293
-new RegExp('/(20\\d{2}/\\d{2}/\\d{2})/', 'i'),
-// 20120127 or 20120127T but not 2012012733 or 8201201733
-// /[^0-9](20\d{2}[01]\d[0-3]\d)([^0-9]|$)/i,
-// 2012-01-27
-new RegExp('(20\\d{2}-[01]\\d-[0-3]\\d)', 'i'),
-// /2012/jan/27/
-new RegExp('/(20\\d{2}/' + abbrevMonthsStr + '/[0-3]\\d)/', 'i')];
-
-var GenericDatePublishedExtractor = {
- extract: function extract(_ref) {
- var $ = _ref.$;
- var url = _ref.url;
- var metaCache = _ref.metaCache;
-
- var datePublished = void 0;
- // First, check to see if we have a matching meta tag
- // that we can make use of.
- // Don't try cleaning tags from this string
- datePublished = extractFromMeta($, DATE_PUBLISHED_META_TAGS, metaCache, false);
- if (datePublished) return cleanDatePublished(datePublished);
-
- // Second, look through our selectors looking for potential
- // date_published's.
- datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS);
- if (datePublished) return cleanDatePublished(datePublished);
-
- // Lastly, look to see if a dately string exists in the URL
- datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);
- if (datePublished) return cleanDatePublished(datePublished);
-
- return null;
- }
-};
-
-// import {
-// DEK_META_TAGS,
-// DEK_SELECTORS,
-// DEK_URL_RES,
-// } from './constants';
-
-// import { cleanDek } from 'cleaners';
-
-// import {
-// extractFromMeta,
-// extractFromSelectors,
-// } from 'utils/dom';
-
-// Currently there is only one selector for
-// deks. We should simply return null here
-// until we have a more robust generic option.
-// Below is the original source for this, for reference.
-var GenericDekExtractor = {
- // extract({ $, content, metaCache }) {
- extract: function extract() {
- return null;
- }
-};
-
-// An ordered list of meta tag names that denote likely article leading images.
-// All attributes should be lowercase for faster case-insensitive matching.
-// From most distinct to least distinct.
-var LEAD_IMAGE_URL_META_TAGS = ['og:image', 'twitter:image', 'image_src'];
-
-var LEAD_IMAGE_URL_SELECTORS = ['link[rel=image_src]'];
-
-var POSITIVE_LEAD_IMAGE_URL_HINTS = ['upload', 'wp-content', 'large', 'photo', 'wp-image'];
-var POSITIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(POSITIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');
-
-var NEGATIVE_LEAD_IMAGE_URL_HINTS = ['spacer', 'sprite', 'blank', 'throbber', 'gradient', 'tile', 'bg', 'background', 'icon', 'social', 'header', 'hdr', 'advert', 'spinner', 'loader', 'loading', 'default', 'rating', 'share', 'facebook', 'twitter', 'theme', 'promo', 'ads', 'wp-includes'];
-var NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');
-
-var GIF_RE = /\.gif(\?.*)?$/i;
-var JPG_RE = /\.jpe?g(\?.*)?$/i;
-
-function getSig($node) {
- return ($node.attr('class') || '') + ' ' + ($node.attr('id') || '');
-}
-
-// Scores image urls based on a variety of heuristics.
-function scoreImageUrl(url) {
- url = url.trim();
- var score = 0;
-
- if (POSITIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {
- score += 20;
- }
-
- if (NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {
- score -= 20;
- }
-
- // TODO: We might want to consider removing this as
- // gifs are much more common/popular than they once were
- if (GIF_RE.test(url)) {
- score -= 10;
- }
-
- if (JPG_RE.test(url)) {
- score += 10;
- }
-
- // PNGs are neutral.
-
- return score;
-}
-
-// Alt attribute usually means non-presentational image.
-function scoreAttr($img) {
- if ($img.attr('alt')) {
- return 5;
- }
-
- return 0;
-}
-
-// Look through our parent and grandparent for figure-like
-// container elements, give a bonus if we find them
-function scoreByParents($img) {
- var score = 0;
- var $figParent = $img.parents('figure').first();
-
- if ($figParent.length === 1) {
- score += 25;
- }
-
- var $parent = $img.parent();
- var $gParent = void 0;
- if ($parent.length === 1) {
- $gParent = $parent.parent();
- }
-
- [$parent, $gParent].forEach(function ($node) {
- if (PHOTO_HINTS_RE$1.test(getSig($node))) {
- score += 15;
- }
- });
-
- return score;
-}
-
-// Look at our immediate sibling and see if it looks like it's a
-// caption. Bonus if so.
-function scoreBySibling($img) {
- var score = 0;
- var $sibling = $img.next();
- var sibling = $sibling.get(0);
-
- if (sibling && sibling.tagName === 'figcaption') {
- score += 25;
- }
-
- if (PHOTO_HINTS_RE$1.test(getSig($sibling))) {
- score += 15;
- }
-
- return score;
-}
-
-function scoreByDimensions($img) {
- var score = 0;
-
- var width = parseFloat($img.attr('width'));
- var height = parseFloat($img.attr('height'));
- var src = $img.attr('src');
-
- // Penalty for skinny images
- if (width && width <= 50) {
- score -= 50;
- }
-
- // Penalty for short images
- if (height && height <= 50) {
- score -= 50;
- }
-
- if (width && height && !src.includes('sprite')) {
- var area = width * height;
- if (area < 5000) {
- // Smaller than 50 x 100
- score -= 100;
- } else {
- score += Math.round(area / 1000);
- }
- }
-
- return score;
-}
-
-function scoreByPosition($imgs, index) {
- return $imgs.length / 2 - index;
-}
-
-// Given a resource, try to find the lead image URL from within
-// it. Like content and next page extraction, uses a scoring system
-// to determine what the most likely image may be. Short circuits
-// on really probable things like og:image meta tags.
-//
-// Potential signals to still take advantage of:
-// * domain
-// * weird aspect ratio
-var GenericLeadImageUrlExtractor = {
- extract: function extract(_ref) {
- var $ = _ref.$;
- var content = _ref.content;
- var metaCache = _ref.metaCache;
-
- var cleanUrl = void 0;
-
- // Check to see if we have a matching meta tag that we can make use of.
- // Moving this higher because common practice is now to use large
- // images on things like Open Graph or Twitter cards.
- // images usually have for things like Open Graph.
- var imageUrl = extractFromMeta($, LEAD_IMAGE_URL_META_TAGS, metaCache, false);
-
- if (imageUrl) {
- cleanUrl = clean$1(imageUrl);
-
- if (cleanUrl) return cleanUrl;
- }
-
- // Next, try to find the "best" image via the content.
- // We'd rather not have to fetch each image and check dimensions,
- // so try to do some analysis and determine them instead.
- var imgs = $('img', content).toArray();
- var imgScores = {};
-
- imgs.forEach(function (img, index) {
- var $img = $(img);
- var src = $img.attr('src');
-
- if (!src) return;
-
- var score = scoreImageUrl(src);
- score += scoreAttr($img);
- score += scoreByParents($img);
- score += scoreBySibling($img);
- score += scoreByDimensions($img);
- score += scoreByPosition(imgs, index);
-
- imgScores[src] = score;
- });
-
- var _Reflect$ownKeys$redu = Reflect.ownKeys(imgScores).reduce(function (acc, key) {
- return imgScores[key] > acc[1] ? [key, imgScores[key]] : acc;
- }, [null, 0]);
-
- var _Reflect$ownKeys$redu2 = slicedToArray(_Reflect$ownKeys$redu, 2);
-
- var topUrl = _Reflect$ownKeys$redu2[0];
- var topScore = _Reflect$ownKeys$redu2[1];
-
-
- if (topScore > 0) {
- cleanUrl = clean$1(topUrl);
-
- if (cleanUrl) return cleanUrl;
- }
-
- // If nothing else worked, check to see if there are any really
- // probable nodes in the doc, like .
- var _iteratorNormalCompletion = true;
- var _didIteratorError = false;
- var _iteratorError = undefined;
-
- try {
- for (var _iterator = LEAD_IMAGE_URL_SELECTORS[Symbol.iterator](), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
- var selector = _step.value;
-
- var $node = $(selector).first();
- var src = $node.attr('src');
- if (src) {
- cleanUrl = clean$1(src);
- if (cleanUrl) return cleanUrl;
- }
-
- var href = $node.attr('href');
- if (href) {
- cleanUrl = clean$1(href);
- if (cleanUrl) return cleanUrl;
- }
-
- var value = $node.attr('value');
- if (value) {
- cleanUrl = clean$1(value);
- if (cleanUrl) return cleanUrl;
- }
- }
- } catch (err) {
- _didIteratorError = true;
- _iteratorError = err;
- } finally {
- try {
- if (!_iteratorNormalCompletion && _iterator.return) {
- _iterator.return();
- }
- } finally {
- if (_didIteratorError) {
- throw _iteratorError;
- }
- }
- }
-
- return null;
- }
-};
-
-// def extract(self):
-// """
-// # First, try to find the "best" image via the content.
-// # We'd rather not have to fetch each image and check dimensions,
-// # so try to do some analysis and determine them instead.
-// content = self.extractor.extract_content(return_type="node")
-// imgs = content.xpath('.//img')
-// img_scores = defaultdict(int)
-// logger.debug('Scoring %d images from content', len(imgs))
-// for (i, img) in enumerate(imgs):
-// img_score = 0
-//
-// if not 'src' in img.attrib:
-// logger.debug('No src attribute found')
-// continue
-//
-// try:
-// parsed_img = urlparse(img.attrib['src'])
-// img_path = parsed_img.path.lower()
-// except ValueError:
-// logger.debug('ValueError getting img path.')
-// continue
-// logger.debug('Image path is %s', img_path)
-//
-// if constants.POSITIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):
-// logger.debug('Positive URL hints match. Adding 20.')
-// img_score += 20
-//
-// if constants.NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):
-// logger.debug('Negative URL hints match. Subtracting 20.')
-// img_score -= 20
-//
-// # Gifs are more often structure than photos
-// if img_path.endswith('gif'):
-// logger.debug('gif found. Subtracting 10.')
-// img_score -= 10
-//
-// # JPGs are more often photographs
-// if img_path.endswith('jpg'):
-// logger.debug('jpg found. Adding 10.')
-// img_score += 10
-//
-// # PNGs are neutral.
-//
-// # Alt attribute usually means non-presentational image.
-// if 'alt' in img.attrib and len(img.attrib['alt']) > 5:
-// logger.debug('alt attribute found. Adding 5.')
-// img_score += 5
-//
-// # Look through our parent and grandparent for figure-like
-// # container elements, give a bonus if we find them
-// parents = [img.getparent()]
-// if parents[0] is not None and parents[0].getparent() is not None:
-// parents.append(parents[0].getparent())
-// for p in parents:
-// if p.tag == 'figure':
-// logger.debug('Parent with
tag found. Adding 25.')
-// img_score += 25
-//
-// p_sig = ' '.join([p.get('id', ''), p.get('class', '')])
-// if constants.PHOTO_HINTS_RE.search(p_sig):
-// logger.debug('Photo hints regex match. Adding 15.')
-// img_score += 15
-//
-// # Look at our immediate sibling and see if it looks like it's a
-// # caption. Bonus if so.
-// sibling = img.getnext()
-// if sibling is not None:
-// if sibling.tag == 'figcaption':
-// img_score += 25
-//
-// sib_sig = ' '.join([sibling.get('id', ''),
-// sibling.get('class', '')]).lower()
-// if 'caption' in sib_sig:
-// img_score += 15
-//
-// # Pull out width/height if they were set.
-// img_width = None
-// img_height = None
-// if 'width' in img.attrib:
-// try:
-// img_width = float(img.get('width'))
-// except ValueError:
-// pass
-// if 'height' in img.attrib:
-// try:
-// img_height = float(img.get('height'))
-// except ValueError:
-// pass
-//
-// # Penalty for skinny images
-// if img_width and img_width <= 50:
-// logger.debug('Skinny image found. Subtracting 50.')
-// img_score -= 50
-//
-// # Penalty for short images
-// if img_height and img_height <= 50:
-// # Wide, short images are more common than narrow, tall ones
-// logger.debug('Short image found. Subtracting 25.')
-// img_score -= 25
-//
-// if img_width and img_height and not 'sprite' in img_path:
-// area = img_width * img_height
-//
-// if area < 5000: # Smaller than 50x100
-// logger.debug('Image with small area found. Subtracting 100.')
-// img_score -= 100
-// else:
-// img_score += round(area/1000.0)
-//
-// # If the image is higher on the page than other images,
-// # it gets a bonus. Penalty if lower.
-// logger.debug('Adding page placement bonus of %d.', len(imgs)/2 - i)
-// img_score += len(imgs)/2 - i
-//
-// # Use the raw src here because we munged img_path for case
-// # insensitivity
-// logger.debug('Final score is %d.', img_score)
-// img_scores[img.attrib['src']] += img_score
-//
-// top_score = 0
-// top_url = None
-// for (url, score) in img_scores.items():
-// if score > top_score:
-// top_url = url
-// top_score = score
-//
-// if top_score > 0:
-// logger.debug('Using top score image from content. Score was %d', top_score)
-// return top_url
-//
-//
-// # If nothing else worked, check to see if there are any really
-// # probable nodes in the doc, like .
-// logger.debug('Trying to find lead image in probable nodes')
-// for selector in constants.LEAD_IMAGE_URL_SELECTORS:
-// nodes = self.resource.extract_by_selector(selector)
-// for node in nodes:
-// clean_value = None
-// if node.attrib.get('src'):
-// clean_value = self.clean(node.attrib['src'])
-//
-// if not clean_value and node.attrib.get('href'):
-// clean_value = self.clean(node.attrib['href'])
-//
-// if not clean_value and node.attrib.get('value'):
-// clean_value = self.clean(node.attrib['value'])
-//
-// if clean_value:
-// logger.debug('Found lead image in probable nodes.')
-// logger.debug('Node was: %s', node)
-// return clean_value
-//
-// return None
-
-function scoreSimilarity(score, articleUrl, href) {
- // Do this last and only if we have a real candidate, because it's
- // potentially expensive computationally. Compare the link to this
- // URL using difflib to get the % similarity of these URLs. On a
- // sliding scale, subtract points from this link based on
- // similarity.
- if (score > 0) {
- var similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio();
- // Subtract .1 from diff_percent when calculating modifier,
- // which means that if it's less than 10% different, we give a
- // bonus instead. Ex:
- // 3% different = +17.5 points
- // 10% different = 0 points
- // 20% different = -25 points
- var diffPercent = 1.0 - similarity;
- var diffModifier = -(250 * (diffPercent - 0.2));
- return score + diffModifier;
- }
-
- return 0;
-}
-
-function scoreLinkText(linkText, pageNum) {
- // If the link text can be parsed as a number, give it a minor
- // bonus, with a slight bias towards lower numbered pages. This is
- // so that pages that might not have 'next' in their text can still
- // get scored, and sorted properly by score.
- var score = 0;
-
- if (IS_DIGIT_RE.test(linkText.trim())) {
- var linkTextAsNum = parseInt(linkText, 10);
- // If it's the first page, we already got it on the first call.
- // Give it a negative score. Otherwise, up to page 10, give a
- // small bonus.
- if (linkTextAsNum < 2) {
- score = -30;
- } else {
- score = Math.max(0, 10 - linkTextAsNum);
- }
-
- // If it appears that the current page number is greater than
- // this links page number, it's a very bad sign. Give it a big
- // penalty.
- if (pageNum && pageNum >= linkTextAsNum) {
- score -= 50;
- }
- }
-
- return score;
-}
-
-function scorePageInLink(pageNum, isWp) {
- // page in the link = bonus. Intentionally ignore wordpress because
- // their ?p=123 link style gets caught by this even though it means
- // separate documents entirely.
- if (pageNum && !isWp) {
- return 50;
- }
-
- return 0;
-}
-
-var DIGIT_RE$2 = /\d/;
-
-// A list of words that, if found in link text or URLs, likely mean that
-// this link is not a next page link.
-var EXTRANEOUS_LINK_HINTS$1 = ['print', 'archive', 'comment', 'discuss', 'e-mail', 'email', 'share', 'reply', 'all', 'login', 'sign', 'single', 'adx', 'entry-unrelated'];
-var EXTRANEOUS_LINK_HINTS_RE$1 = new RegExp(EXTRANEOUS_LINK_HINTS$1.join('|'), 'i');
-
-// Match any link text/classname/id that looks like it could mean the next
-// page. Things like: next, continue, >, >>, » but not >|, »| as those can
-// mean last page.
-var NEXT_LINK_TEXT_RE$1 = new RegExp('(next|weiter|continue|>([^|]|$)|»([^|]|$))', 'i');
-
-// Match any link text/classname/id that looks like it is an end link: things
-// like "first", "last", "end", etc.
-var CAP_LINK_TEXT_RE$1 = new RegExp('(first|last|end)', 'i');
-
-// Match any link text/classname/id that looks like it means the previous
-// page.
-var PREV_LINK_TEXT_RE$1 = new RegExp('(prev|earl|old|new|<|«)', 'i');
-
-function scoreExtraneousLinks(href) {
- // If the URL itself contains extraneous values, give a penalty.
- if (EXTRANEOUS_LINK_HINTS_RE$1.test(href)) {
- return -25;
- }
-
- return 0;
-}
-
-function makeSig$1($link) {
- return ($link.attr('class') || '') + ' ' + ($link.attr('id') || '');
-}
-
-function scoreByParents$1($link) {
- // If a parent node contains paging-like classname or id, give a
- // bonus. Additionally, if a parent_node contains bad content
- // (like 'sponsor'), give a penalty.
- var $parent = $link.parent();
- var positiveMatch = false;
- var negativeMatch = false;
- var score = 0;
-
- Array.from(range(0, 4)).forEach(function () {
- if ($parent.length === 0) {
- return;
- }
-
- var parentData = makeSig$1($parent, ' ');
-
- // If we have 'page' or 'paging' in our data, that's a good
- // sign. Add a bonus.
- if (!positiveMatch && PAGE_RE.test(parentData)) {
- positiveMatch = true;
- score += 25;
- }
-
- // If we have 'comment' or something in our data, and
- // we don't have something like 'content' as well, that's
- // a bad sign. Give a penalty.
- if (!negativeMatch && NEGATIVE_SCORE_RE.test(parentData) && EXTRANEOUS_LINK_HINTS_RE$1.test(parentData)) {
- if (!POSITIVE_SCORE_RE.test(parentData)) {
- negativeMatch = true;
- score -= 25;
- }
- }
-
- $parent = $parent.parent();
- });
-
- return score;
-}
-
-function scorePrevLink(linkData) {
- // If the link has something like "previous", its definitely
- // an old link, skip it.
- if (PREV_LINK_TEXT_RE$1.test(linkData)) {
- return -200;
- }
-
- return 0;
-}
-
-function shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls) {
- // skip if we've already fetched this url
- if (previousUrls.find(function (url) {
- return href === url;
- }) !== undefined) {
- return false;
- }
-
- // If we've already parsed this URL, or the URL matches the base
- // URL, or is empty, skip it.
- if (!href || href === articleUrl || href === baseUrl) {
- return false;
- }
-
- var hostname = parsedUrl.hostname;
-
- var _URL$parse = URL.parse(href);
-
- var linkHost = _URL$parse.hostname;
-
- // Domain mismatch.
-
- if (linkHost !== hostname) {
- return false;
- }
-
- // If href doesn't contain a digit after removing the base URL,
- // it's certainly not the next page.
- var fragment = href.replace(baseUrl, '');
- if (!DIGIT_RE$2.test(fragment)) {
- return false;
- }
-
- // This link has extraneous content (like "comment") in its link
- // text, so we skip it.
- if (EXTRANEOUS_LINK_HINTS_RE$1.test(linkText)) {
- return false;
- }
-
- // Next page link text is never long, skip if it is too long.
- if (linkText.length > 25) {
- return false;
- }
-
- return true;
-}
-
-function scoreBaseUrl(href, baseRegex) {
- // If the baseUrl isn't part of this URL, penalize this
- // link. It could still be the link, but the odds are lower.
- // Example:
- // http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html
- if (!baseRegex.test(href)) {
- return -25;
- }
-
- return 0;
-}
-
-function scoreNextLinkText(linkData) {
- // Things like "next", ">>", etc.
- if (NEXT_LINK_TEXT_RE$1.test(linkData)) {
- return 50;
- }
-
- return 0;
-}
-
-function scoreCapLinks(linkData) {
- // Cap links are links like "last", etc.
- if (CAP_LINK_TEXT_RE$1.test(linkData)) {
- // If we found a link like "last", but we've already seen that
- // this link is also "next", it's fine. If it's not been
- // previously marked as "next", then it's probably bad.
- // Penalize.
- if (NEXT_LINK_TEXT_RE$1.test(linkData)) {
- return -65;
- }
- }
-
- return 0;
-}
-
-function makeBaseRegex(baseUrl) {
- return new RegExp('^' + baseUrl, 'i');
-}
-
-function makeSig($link, linkText) {
- return (linkText || $link.text()) + ' ' + ($link.attr('class') || '') + ' ' + ($link.attr('id') || '');
-}
-
-function scoreLinks(_ref) {
- var links = _ref.links;
- var articleUrl = _ref.articleUrl;
- var baseUrl = _ref.baseUrl;
- var parsedUrl = _ref.parsedUrl;
- var $ = _ref.$;
- var _ref$previousUrls = _ref.previousUrls;
- var previousUrls = _ref$previousUrls === undefined ? [] : _ref$previousUrls;
-
- parsedUrl = parsedUrl || URL.parse(articleUrl);
- var baseRegex = makeBaseRegex(baseUrl);
- var isWp = isWordpress($);
-
- // Loop through all links, looking for hints that they may be next-page
- // links. Things like having "page" in their textContent, className or
- // id, or being a child of a node with a page-y className or id.
- //
- // After we do that, assign each page a score, and pick the one that
- // looks most like the next page link, as long as its score is strong
- // enough to have decent confidence.
- var scoredPages = links.reduce(function (possiblePages, link) {
- // Remove any anchor data since we don't do a good job
- // standardizing URLs (it's hard), we're going to do
- // some checking with and without a trailing slash
- var href = removeAnchor(link.attribs.href);
- var $link = $(link);
- var linkText = $link.text();
-
- if (!shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls)) {
- return possiblePages;
- }
-
- // ## PASSED THE FIRST-PASS TESTS. Start scoring. ##
- if (!possiblePages[href]) {
- possiblePages[href] = {
- score: 0,
- linkText: linkText,
- href: href
- };
- } else {
- possiblePages[href].linkText = possiblePages[href].linkText + '|' + linkText;
- }
-
- var possiblePage = possiblePages[href];
- var linkData = makeSig($link, linkText);
- var pageNum = pageNumFromUrl(href);
-
- var score = scoreBaseUrl(href, baseRegex);
- score += scoreNextLinkText(linkData);
- score += scoreCapLinks(linkData);
- score += scorePrevLink(linkData);
- score += scoreByParents$1($link);
- score += scoreExtraneousLinks(href);
- score += scorePageInLink(pageNum, isWp);
- score += scoreLinkText(linkText, pageNum);
- score += scoreSimilarity(score, articleUrl, href);
-
- possiblePage.score = score;
-
- return possiblePages;
- }, {});
-
- return Reflect.ownKeys(scoredPages).length === 0 ? null : scoredPages;
-}
-
-// Looks for and returns next page url
-// for multi-page articles
-var GenericNextPageUrlExtractor = {
- extract: function extract(_ref) {
- var $ = _ref.$;
- var url = _ref.url;
- var parsedUrl = _ref.parsedUrl;
- var _ref$previousUrls = _ref.previousUrls;
- var previousUrls = _ref$previousUrls === undefined ? [] : _ref$previousUrls;
-
- parsedUrl = parsedUrl || URL.parse(url);
-
- var articleUrl = removeAnchor(url);
- var baseUrl = articleBaseUrl(url, parsedUrl);
-
- var links = $('a[href]').toArray();
-
- var scoredLinks = scoreLinks({
- links: links,
- articleUrl: articleUrl,
- baseUrl: baseUrl,
- parsedUrl: parsedUrl,
- $: $,
- previousUrls: previousUrls
- });
-
- // If no links were scored, return null
- if (!scoredLinks) return null;
-
- // now that we've scored all possible pages,
- // find the biggest one.
- var topPage = Reflect.ownKeys(scoredLinks).reduce(function (acc, link) {
- var scoredLink = scoredLinks[link];
- return scoredLink.score > acc.score ? scoredLink : acc;
- }, { score: -100 });
-
- // If the score is less than 50, we're not confident enough to use it,
- // so we fail.
- if (topPage.score >= 50) {
- return topPage.href;
- }
-
- return null;
- }
-};
-
-var CANONICAL_META_SELECTORS = ['og:url'];
-
-function parseDomain(url) {
- var parsedUrl = URL.parse(url);
- var hostname = parsedUrl.hostname;
-
- return hostname;
-}
-
-function result(url) {
- return {
- url: url,
- domain: parseDomain(url)
- };
-}
-
-var GenericUrlExtractor = {
- extract: function extract(_ref) {
- var $ = _ref.$;
- var url = _ref.url;
- var metaCache = _ref.metaCache;
-
- var $canonical = $('link[rel=canonical]');
- if ($canonical.length !== 0) {
- var href = $canonical.attr('href');
- if (href) {
- return result(href);
- }
- }
-
- var metaUrl = extractFromMeta($, CANONICAL_META_SELECTORS, metaCache);
- if (metaUrl) {
- return result(metaUrl);
- }
-
- return result(url);
- }
-};
-
-var EXCERPT_META_SELECTORS = ['og:description', 'twitter:description'];
-
-function clean$2(content, $) {
- var maxLength = arguments.length <= 2 || arguments[2] === undefined ? 200 : arguments[2];
-
- content = content.replace(/[\s\n]+/g, ' ').trim();
- return ellipsize(content, maxLength, { ellipse: '…' });
-}
-
-var GenericExcerptExtractor = {
- extract: function extract(_ref) {
- var $ = _ref.$;
- var content = _ref.content;
- var metaCache = _ref.metaCache;
-
- var excerpt = extractFromMeta($, EXCERPT_META_SELECTORS, metaCache);
- if (excerpt) {
- return clean$2(stripTags(excerpt, $));
- }
- // Fall back to excerpting from the extracted content
- var maxLength = 200;
- var shortContent = content.slice(0, maxLength * 5);
- return clean$2($(shortContent).text(), $, maxLength);
- }
-};
-
-var GenericWordCountExtractor = {
- extract: function extract(_ref) {
- var content = _ref.content;
-
- var $ = cheerio.load(content);
-
- var text = normalizeSpaces($('div').first().text());
- return text.split(/\s/).length;
- }
-};
-
-var GenericExtractor = {
- // This extractor is the default for all domains
- domain: '*',
- title: GenericTitleExtractor.extract,
- date_published: GenericDatePublishedExtractor.extract,
- author: GenericAuthorExtractor.extract,
- content: GenericContentExtractor.extract.bind(GenericContentExtractor),
- lead_image_url: GenericLeadImageUrlExtractor.extract,
- dek: GenericDekExtractor.extract,
- next_page_url: GenericNextPageUrlExtractor.extract,
- url_and_domain: GenericUrlExtractor.extract,
- excerpt: GenericExcerptExtractor.extract,
- word_count: GenericWordCountExtractor.extract,
- direction: function direction(_ref) {
- var title = _ref.title;
- return stringDirection.getDirection(title);
- },
-
- extract: function extract(options) {
- var html = options.html;
-
-
- if (html) {
- var $ = cheerio.load(html);
- options.$ = $;
- }
-
- var title = this.title(options);
- var date_published = this.date_published(options);
- var author = this.author(options);
- var content = this.content(_extends({}, options, { title: title }));
- var lead_image_url = this.lead_image_url(_extends({}, options, { content: content }));
- var dek = this.dek(_extends({}, options, { content: content }));
- var next_page_url = this.next_page_url(options);
- var excerpt = this.excerpt(_extends({}, options, { content: content }));
- var word_count = this.word_count(_extends({}, options, { content: content }));
- var direction = this.direction({ title: title });
-
- var _url_and_domain = this.url_and_domain(options);
-
- var url = _url_and_domain.url;
- var domain = _url_and_domain.domain;
-
-
- return {
- title: title,
- author: author,
- date_published: date_published || null,
- dek: dek,
- lead_image_url: lead_image_url,
- content: content,
- next_page_url: next_page_url,
- url: url,
- domain: domain,
- excerpt: excerpt,
- word_count: word_count,
- direction: direction
- };
- }
-};
-
-function getExtractor(url, parsedUrl) {
- parsedUrl = parsedUrl || URL.parse(url);
- var _parsedUrl = parsedUrl;
- var hostname = _parsedUrl.hostname;
-
- var baseDomain = hostname.split('.').slice(-2).join('.');
-
- return Extractors[hostname] || Extractors[baseDomain] || GenericExtractor;
-}
-
-var ATTR_RE = /\[([\w-]+)\]/;
-
-// Remove elements by an array of selectors
-function cleanBySelectors($content, $, _ref) {
- var clean = _ref.clean;
-
- if (!clean) return null;
-
- $(clean.join(','), $content).remove();
-
- return $content;
-}
-
-// Transform matching elements
-function transformElements($content, $, _ref2) {
- var transforms = _ref2.transforms;
-
- if (!transforms) return null;
-
- Reflect.ownKeys(transforms).forEach(function (key) {
- var $matches = $(key, $content);
- var value = transforms[key];
-
- // If value is a string, convert directly
- if (typeof value === 'string') {
- $matches.each(function (index, node) {
- convertNodeTo($(node), $, transforms[key]);
- });
- } else if (typeof value === 'function') {
- // If value is function, apply function to node
- $matches.each(function (index, node) {
- var result = value($(node), $);
- // If function returns a string, convert node to that value
- if (typeof result === 'string') {
- convertNodeTo($(node), $, result);
- }
- });
- }
- });
-
- return $content;
-}
-
-function select(opts) {
- var $ = opts.$;
- var type = opts.type;
- var extractionOpts = opts.extractionOpts;
- var _opts$extractHtml = opts.extractHtml;
- var extractHtml = _opts$extractHtml === undefined ? false : _opts$extractHtml;
- // Skip if there's not extraction for this type
-
- if (!extractionOpts) return null;
-
- // If a string is hardcoded for a type (e.g., Wikipedia
- // contributors), return the string
- if (typeof extractionOpts === 'string') return extractionOpts;
-
- var selectors = extractionOpts.selectors;
- var _extractionOpts$defau = extractionOpts.defaultCleaner;
- var defaultCleaner = _extractionOpts$defau === undefined ? true : _extractionOpts$defau;
-
-
- var matchingSelector = selectors.find(function (selector) {
- return $(selector).length === 1 && $(selector).text().trim() !== '';
- });
-
- if (!matchingSelector) return null;
-
- // Declaring result; will contain either
- // text or html, which will be cleaned
- // by the appropriate cleaner type
-
- // If the selector type requests html as its return type
- // transform and clean the element with provided selectors
- if (extractHtml) {
- var $content = $(matchingSelector);
-
- // Wrap in div so transformation can take place on root element
- $content.wrap($(''));
- $content = $content.parent();
-
- $content = transformElements($content, $, extractionOpts);
- $content = cleanBySelectors($content, $, extractionOpts);
-
- if (defaultCleaner) {
- $content = Cleaners[type]($content, opts);
- }
-
- return $.html($content);
- }
- // if selector includes an attr (e.g., img[src]),
- // extract the attr
- var attr = matchingSelector.match(ATTR_RE);
- var result = void 0;
-
- if (attr) {
- result = $(matchingSelector).attr(attr[1]);
- } else {
- // otherwise use the text of the node
- result = $(matchingSelector).text();
- }
-
- // Allow custom extractor to skip default cleaner
- // for this type; defaults to true
- if (defaultCleaner) {
- return Cleaners[type](result, opts);
- }
-
- return result;
-}
-
-function extractResult(opts) {
- var type = opts.type;
- var extractor = opts.extractor;
-
- // If nothing matches the selector,
- // run the Generic extraction
-
- return select(_extends({}, opts, { extractionOpts: extractor[type] })) || GenericExtractor[type](opts);
-}
-
-var RootExtractor = {
- extract: function extract() {
- var extractor = arguments.length <= 0 || arguments[0] === undefined ? GenericExtractor : arguments[0];
- var opts = arguments[1];
- var _opts = opts;
- var contentOnly = _opts.contentOnly;
- var extractedTitle = _opts.extractedTitle;
- // This is the generic extractor. Run its extract method
-
- if (extractor.domain === '*') return extractor.extract(opts);
-
- opts = _extends({}, opts, {
- extractor: extractor
- });
-
- if (contentOnly) {
- var _content = extractResult(_extends({}, opts, { type: 'content', extractHtml: true, title: extractedTitle
- }));
- return {
- content: _content
- };
- }
- var title = extractResult(_extends({}, opts, { type: 'title' }));
- var date_published = extractResult(_extends({}, opts, { type: 'date_published' }));
- var author = extractResult(_extends({}, opts, { type: 'author' }));
- var next_page_url = extractResult(_extends({}, opts, { type: 'next_page_url' }));
- var content = extractResult(_extends({}, opts, { type: 'content', extractHtml: true, title: title
- }));
- var lead_image_url = extractResult(_extends({}, opts, { type: 'lead_image_url', content: content }));
- var dek = extractResult(_extends({}, opts, { type: 'dek', content: content }));
- var excerpt = extractResult(_extends({}, opts, { type: 'excerpt', content: content }));
- var word_count = extractResult(_extends({}, opts, { type: 'word_count', content: content }));
- var direction = extractResult(_extends({}, opts, { type: 'direction', title: title }));
-
- var _extractResult = extractResult(_extends({}, opts, { type: 'url_and_domain' }));
-
- var url = _extractResult.url;
- var domain = _extractResult.domain;
-
-
- return {
- title: title,
- content: content,
- author: author,
- date_published: date_published,
- lead_image_url: lead_image_url,
- dek: dek,
- next_page_url: next_page_url,
- url: url,
- domain: domain,
- excerpt: excerpt,
- word_count: word_count,
- direction: direction
- };
- }
-};
-
-var collectAllPages = (function () {
- var _ref = asyncToGenerator(regeneratorRuntime.mark(function _callee(_ref2) {
- var next_page_url = _ref2.next_page_url;
- var html = _ref2.html;
- var $ = _ref2.$;
- var metaCache = _ref2.metaCache;
- var result = _ref2.result;
- var Extractor = _ref2.Extractor;
- var title = _ref2.title;
- var url = _ref2.url;
- var pages, previousUrls, extractorOpts, nextPageResult;
- return regeneratorRuntime.wrap(function _callee$(_context) {
- while (1) {
- switch (_context.prev = _context.next) {
- case 0:
- // At this point, we've fetched just the first page
- pages = 1;
- previousUrls = [removeAnchor(url)];
-
- // If we've gone over 26 pages, something has
- // likely gone wrong.
-
- case 2:
- if (!(next_page_url && pages < 26)) {
- _context.next = 15;
- break;
- }
-
- pages += 1;
- _context.next = 6;
- return Resource.create(next_page_url);
-
- case 6:
- $ = _context.sent;
-
- html = $.html();
-
- extractorOpts = {
- url: next_page_url,
- html: html,
- $: $,
- metaCache: metaCache,
- contentOnly: true,
- extractedTitle: title,
- previousUrls: previousUrls
- };
- nextPageResult = RootExtractor.extract(Extractor, extractorOpts);
-
-
- previousUrls.push(next_page_url);
- result = _extends({}, result, {
- content: '\n ' + result.content + '\n \n
Page ' + pages + '
\n ' + nextPageResult.content + '\n '
- });
-
- next_page_url = nextPageResult.next_page_url;
- _context.next = 2;
- break;
-
- case 15:
- return _context.abrupt('return', _extends({}, result, {
- total_pages: pages,
- pages_rendered: pages
- }));
-
- case 16:
- case 'end':
- return _context.stop();
- }
- }
- }, _callee, this);
- }));
-
- function collectAllPages(_x) {
- return _ref.apply(this, arguments);
- }
-
- return collectAllPages;
-})();
-
-var Iris = {
- parse: function parse(url, html) {
- var _this = this;
-
- var opts = arguments.length <= 2 || arguments[2] === undefined ? {} : arguments[2];
- return asyncToGenerator(regeneratorRuntime.mark(function _callee() {
- var _ref, _ref$fetchAllPages, fetchAllPages, parsedUrl, Extractor, $, metaCache, result, _result, title, next_page_url;
-
- return regeneratorRuntime.wrap(function _callee$(_context) {
- while (1) {
- switch (_context.prev = _context.next) {
- case 0:
- _ref = opts || true;
- _ref$fetchAllPages = _ref.fetchAllPages;
- fetchAllPages = _ref$fetchAllPages === undefined ? true : _ref$fetchAllPages;
- parsedUrl = URL.parse(url);
-
- if (validateUrl(parsedUrl)) {
- _context.next = 6;
- break;
- }
-
- return _context.abrupt('return', Errors.badUrl);
-
- case 6:
- Extractor = getExtractor(url, parsedUrl);
- // console.log(`Using extractor for ${Extractor.domain}`);
-
- _context.next = 9;
- return Resource.create(url, html, parsedUrl);
-
- case 9:
- $ = _context.sent;
-
- if (!$.error) {
- _context.next = 12;
- break;
- }
-
- return _context.abrupt('return', $);
-
- case 12:
-
- html = $.html();
-
- // Cached value of every meta name in our document.
- // Used when extracting title/author/date_published/dek
- metaCache = $('meta').map(function (_, node) {
- return $(node).attr('name');
- }).toArray();
- result = RootExtractor.extract(Extractor, { url: url, html: html, $: $, metaCache: metaCache, parsedUrl: parsedUrl });
- _result = result;
- title = _result.title;
- next_page_url = _result.next_page_url;
-
- // Fetch more pages if next_page_url found
-
- if (!(fetchAllPages && next_page_url)) {
- _context.next = 24;
- break;
- }
-
- _context.next = 21;
- return collectAllPages({
- Extractor: Extractor,
- next_page_url: next_page_url,
- html: html,
- $: $,
- metaCache: metaCache,
- result: result,
- title: title,
- url: url
- });
-
- case 21:
- result = _context.sent;
- _context.next = 25;
- break;
-
- case 24:
- result = _extends({}, result, {
- total_pages: 1,
- rendered_pages: 1
- });
-
- case 25:
- return _context.abrupt('return', result);
-
- case 26:
- case 'end':
- return _context.stop();
- }
- }
- }, _callee, _this);
- }))();
- }
-};
-
-module.exports = Iris;
-//# sourceMappingURL=iris.js.map
diff --git a/dist/iris.js.map b/dist/iris.js.map
deleted file mode 100644
index 32a7c816..00000000
--- a/dist/iris.js.map
+++ /dev/null
@@ -1 +0,0 @@
-{"version":3,"file":null,"sources":["../src/utils/range.js","../src/utils/validate-url.js","../src/utils/errors.js","../src/resource/utils/constants.js","../src/resource/utils/fetch-resource.js","../src/resource/utils/dom/normalize-meta-tags.js","../src/resource/utils/dom/constants.js","../src/resource/utils/dom/convert-lazy-loaded-images.js","../src/resource/utils/dom/clean.js","../src/resource/index.js","../src/extractors/custom/nymag.com/index.js","../src/extractors/custom/blogspot.com/index.js","../src/extractors/custom/wikipedia.org/index.js","../src/extractors/custom/twitter.com/index.js","../src/extractors/all.js","../src/utils/dom/constants.js","../src/utils/dom/strip-unlikely-candidates.js","../src/utils/dom/brs-to-ps.js","../src/utils/dom/paragraphize.js","../src/utils/dom/convert-to-paragraphs.js","../src/utils/dom/convert-node-to.js","../src/utils/dom/clean-images.js","../src/utils/dom/strip-junk-tags.js","../src/utils/dom/clean-h-ones.js","../src/utils/dom/clean-attributes.js","../src/utils/dom/remove-empty.js","../src/extractors/generic/content/scoring/constants.js","../src/extractors/generic/content/scoring/get-weight.js","../src/extractors/generic/content/scoring/get-score.js","../src/extractors/generic/content/scoring/score-commas.js","../src/extractors/generic/content/scoring/score-length.js","../src/extractors/generic/content/scoring/score-paragraph.js","../src/extractors/generic/content/scoring/set-score.js","../src/extractors/generic/content/scoring/add-score.js","../src/extractors/generic/content/scoring/add-to-parent.js","../src/extractors/generic/content/scoring/get-or-init-score.js","../src/extractors/generic/content/scoring/score-node.js","../src/extractors/generic/content/scoring/score-content.js","../src/utils/text/normalize-spaces.js","../src/utils/text/extract-from-url.js","../src/utils/text/constants.js","../src/utils/text/page-num-from-url.js","../src/utils/text/remove-anchor.js","../src/utils/text/article-base-url.js","../src/utils/text/has-sentence-end.js","../src/extractors/generic/content/scoring/merge-siblings.js","../src/extractors/generic/content/scoring/find-top-candidate.js","../src/utils/dom/clean-tags.js","../src/utils/dom/clean-headers.js","../src/utils/dom/rewrite-top-level.js","../src/utils/dom/make-links-absolute.js","../src/utils/dom/link-density.js","../src/utils/dom/extract-from-meta.js","../src/utils/dom/extract-from-selectors.js","../src/utils/dom/strip-tags.js","../src/utils/dom/within-comment.js","../src/utils/dom/node-is-sufficient.js","../src/utils/dom/is-wordpress.js","../src/cleaners/constants.js","../src/cleaners/author.js","../src/cleaners/lead-image-url.js","../src/cleaners/dek.js","../src/cleaners/date-published.js","../src/cleaners/content.js","../src/cleaners/title.js","../src/cleaners/resolve-split-title.js","../src/cleaners/index.js","../src/extractors/generic/content/extract-best-node.js","../src/extractors/generic/content/extractor.js","../src/extractors/generic/title/constants.js","../src/extractors/generic/title/extractor.js","../src/extractors/generic/author/constants.js","../src/extractors/generic/author/extractor.js","../src/extractors/generic/date-published/constants.js","../src/extractors/generic/date-published/extractor.js","../src/extractors/generic/dek/extractor.js","../src/extractors/generic/lead-image-url/constants.js","../src/extractors/generic/lead-image-url/score-image.js","../src/extractors/generic/lead-image-url/extractor.js","../src/extractors/generic/next-page-url/scoring/utils/score-similarity.js","../src/extractors/generic/next-page-url/scoring/utils/score-link-text.js","../src/extractors/generic/next-page-url/scoring/utils/score-page-in-link.js","../src/extractors/generic/next-page-url/scoring/constants.js","../src/extractors/generic/next-page-url/scoring/utils/score-extraneous-links.js","../src/extractors/generic/next-page-url/scoring/utils/score-by-parents.js","../src/extractors/generic/next-page-url/scoring/utils/score-prev-link.js","../src/extractors/generic/next-page-url/scoring/utils/should-score.js","../src/extractors/generic/next-page-url/scoring/utils/score-base-url.js","../src/extractors/generic/next-page-url/scoring/utils/score-next-link-text.js","../src/extractors/generic/next-page-url/scoring/utils/score-cap-links.js","../src/extractors/generic/next-page-url/scoring/score-links.js","../src/extractors/generic/next-page-url/extractor.js","../src/extractors/generic/url/constants.js","../src/extractors/generic/url/extractor.js","../src/extractors/generic/excerpt/constants.js","../src/extractors/generic/excerpt/extractor.js","../src/extractors/generic/word-count/extractor.js","../src/extractors/generic/index.js","../src/extractors/get-extractor.js","../src/extractors/constants.js","../src/extractors/root-extractor.js","../src/extractors/collect-all-pages.js","../src/iris.js"],"sourcesContent":["export default function* range(start = 1, end = 1) {\n while (start <= end) {\n yield start += 1;\n }\n}\n","// extremely simple url validation as a first step\nexport default function validateUrl({ hostname }) {\n // If this isn't a valid url, return an error message\n return !!hostname;\n}\n","const Errors = {\n badUrl: {\n error: true,\n messages: 'The url parameter passed does not look like a valid URL. Please check your data and try again.',\n },\n};\n\nexport default Errors;\n","export const REQUEST_HEADERS = {\n 'User-Agent': 'Readability - http://readability.com/about/',\n};\n\n// The number of milliseconds to attempt to fetch a resource before timing out.\nexport const FETCH_TIMEOUT = 10000;\n\n// Content types that we do not extract content from\nconst BAD_CONTENT_TYPES = [\n 'audio/mpeg',\n 'image/gif',\n 'image/jpeg',\n 'image/jpg',\n];\n\nexport const BAD_CONTENT_TYPES_RE = new RegExp(`^(${BAD_CONTENT_TYPES.join('|')})$`, 'i');\n\n\n// Use this setting as the maximum size an article can be\n// for us to attempt parsing. Defaults to 5 MB.\nexport const MAX_CONTENT_LENGTH = 5242880;\n\n// Turn the global proxy on or off\n// Proxying is not currently enabled in Python source\n// so not implementing logic in port.\nexport const PROXY_DOMAINS = false;\nexport const REQUESTS_PROXIES = {\n http: 'http://38.98.105.139:33333',\n https: 'http://38.98.105.139:33333',\n};\n\nexport const DOMAINS_TO_PROXY = [\n 'nih.gov',\n 'gutenberg.org',\n];\n","import 'babel-polyfill';\n\nimport URL from 'url';\nimport request from 'request';\nimport { Errors } from 'utils';\n\nimport {\n REQUEST_HEADERS,\n FETCH_TIMEOUT,\n BAD_CONTENT_TYPES_RE,\n MAX_CONTENT_LENGTH,\n} from './constants';\n\nfunction get(options) {\n return new Promise((resolve, reject) => {\n request(options, (err, response, body) => {\n if (err) {\n reject(err);\n } else {\n resolve({ body, response });\n }\n });\n });\n}\n\n// Evaluate a response to ensure it's something we should be keeping.\n// This does not validate in the sense of a response being 200 level or\n// not. Validation here means that we haven't found reason to bail from\n// further processing of this url.\n\nexport function validateResponse(response, parseNon2xx = false) {\n // Check if we got a valid status code\n if (response.statusMessage !== 'OK') {\n if (!response.statusCode) {\n throw new Error(\n `Unable to fetch content. Original exception was ${response.error}`\n );\n } else if (!parseNon2xx) {\n throw new Error(\n `Resource returned a response status code of ${response.statusCode} and resource was instructed to reject non-2xx level status codes.`\n );\n }\n }\n\n const {\n 'content-type': contentType,\n 'content-length': contentLength,\n } = response.headers;\n\n // Check that the content is not in BAD_CONTENT_TYPES\n if (BAD_CONTENT_TYPES_RE.test(contentType)) {\n throw new Error(\n `Content-type for this resource was ${contentType} and is not allowed.`\n );\n }\n\n // Check that the content length is below maximum\n if (contentLength > MAX_CONTENT_LENGTH) {\n throw new Error(\n `Content for this resource was too large. Maximum content length is ${MAX_CONTENT_LENGTH}.`\n );\n }\n\n return true;\n}\n\n// Grabs the last two pieces of the URL and joins them back together\n// This is to get the 'livejournal.com' from 'erotictrains.livejournal.com'\nexport function baseDomain({ host }) {\n return host.split('.').slice(-2).join('.');\n}\n\n// Set our response attribute to the result of fetching our URL.\n// TODO: This should gracefully handle timeouts and raise the\n// proper exceptions on the many failure cases of HTTP.\n// TODO: Ensure we are not fetching something enormous. Always return\n// unicode content for HTML, with charset conversion.\n\nexport default async function fetchResource(url, parsedUrl) {\n parsedUrl = parsedUrl || URL.parse(encodeURI(url));\n\n const options = {\n url: parsedUrl,\n headers: { ...REQUEST_HEADERS },\n timeout: FETCH_TIMEOUT,\n // Don't set encoding; fixes issues\n // w/gzipped responses\n encoding: null,\n // Accept cookies\n jar: true,\n // Accept and decode gzip\n gzip: true,\n // Follow any redirect\n followAllRedirects: true,\n };\n\n const { response, body } = await get(options);\n\n try {\n validateResponse(response);\n return { body, response };\n } catch (e) {\n return Errors.badUrl;\n }\n}\n","function convertMetaProp($, from, to) {\n $(`meta[${from}]`).each((_, node) => {\n const $node = $(node);\n\n const value = $node.attr(from);\n $node.attr(to, value);\n $node.removeAttr(from);\n });\n\n return $;\n}\n\n// For ease of use in extracting from meta tags,\n// replace the \"content\" attribute on meta tags with the\n// \"value\" attribute.\n//\n// In addition, normalize 'property' attributes to 'name' for ease of\n// querying later. See, e.g., og or twitter meta tags.\n\nexport default function normalizeMetaTags($) {\n $ = convertMetaProp($, 'content', 'value');\n $ = convertMetaProp($, 'property', 'name');\n return $;\n}\n","export const IS_LINK = new RegExp('https?://', 'i');\nexport const IS_IMAGE = new RegExp('.(png|gif|jpe?g)', 'i');\n\nexport const TAGS_TO_REMOVE = [\n 'script',\n 'style',\n 'form',\n].join(',');\n","import 'babel-polyfill';\n\nimport {\n IS_LINK,\n IS_IMAGE,\n} from './constants';\n\n// Convert all instances of images with potentially\n// lazy loaded images into normal images.\n// Many sites will have img tags with no source, or an image tag with a src\n// attribute that a is a placeholer. We need to be able to properly fill in\n// the src attribute so the images are no longer lazy loaded.\nexport default function convertLazyLoadedImages($) {\n $('img').each((_, img) => {\n Reflect.ownKeys(img.attribs).forEach((attr) => {\n const value = img.attribs[attr];\n\n if (attr !== 'src' && IS_LINK.test(value) &&\n IS_IMAGE.test(value)) {\n $(img).attr('src', value);\n }\n });\n });\n\n return $;\n}\n","import { TAGS_TO_REMOVE } from './constants';\n\nfunction isComment(index, node) {\n return node.type === 'comment';\n}\n\nfunction cleanComments($) {\n $.root().find('*')\n .contents()\n .filter(isComment)\n .remove();\n\n return $;\n}\n\nexport default function clean($) {\n $(TAGS_TO_REMOVE).remove();\n\n $ = cleanComments($);\n return $;\n}\n","import 'babel-polyfill';\n\nimport cheerio from 'cheerio';\n\nimport { fetchResource } from './utils';\nimport {\n normalizeMetaTags,\n convertLazyLoadedImages,\n clean,\n} from './utils/dom';\n\nconst Resource = {\n\n // Create a Resource.\n //\n // :param url: The URL for the document we should retrieve.\n // :param response: If set, use as the response rather than\n // attempting to fetch it ourselves. Expects a\n // string.\n async create(url, preparedResponse, parsedUrl) {\n let result;\n\n if (preparedResponse) {\n const validResponse = {\n statusMessage: 'OK',\n statusCode: 200,\n headers: {\n 'content-type': 'text/html',\n 'content-length': 500,\n },\n };\n\n result = { body: preparedResponse, response: validResponse };\n } else {\n result = await fetchResource(url, parsedUrl);\n }\n\n if (result.error) {\n return result;\n }\n\n return this.generateDoc(result);\n },\n\n generateDoc({ body: content, response }) {\n const { 'content-type': contentType } = response.headers;\n\n // TODO: Implement is_text function from\n // https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57\n if (!contentType.includes('html') &&\n !contentType.includes('text')) {\n throw new Error('Content does not appear to be text.');\n }\n\n let $ = cheerio.load(content, { normalizeWhitespace: true });\n\n if ($.root().children().length === 0) {\n throw new Error('No children, likely a bad parse.');\n }\n\n $ = normalizeMetaTags($);\n $ = convertLazyLoadedImages($);\n $ = clean($);\n\n return $;\n },\n};\n\nexport default Resource;\n","const NYMagExtractor = {\n domain: 'nymag.com',\n content: {\n // Order by most likely. Extractor will stop on first occurence\n selectors: [\n 'div.article-content',\n 'section.body',\n 'article.article',\n ],\n\n // Selectors to remove from the extracted content\n clean: [\n '.ad',\n '.single-related-story',\n ],\n\n // Object of tranformations to make on matched elements\n // Each key is the selector, each value is the tag to\n // transform to.\n // If a function is given, it should return a string\n // to convert to or nothing (in which case it will not perform\n // the transformation.\n transforms: {\n // Convert h1s to h2s\n h1: 'h2',\n\n // Convert lazy-loaded noscript images to figures\n noscript: ($node) => {\n const $children = $node.children();\n if ($children.length === 1 && $children.get(0).tagName === 'img') {\n return 'figure';\n }\n\n return null;\n },\n },\n },\n\n title: {\n selectors: [\n 'h1.lede-feature-title',\n 'h1.headline-primary',\n 'h1',\n ],\n },\n\n author: {\n selectors: [\n '.by-authors',\n '.lede-feature-author',\n ],\n },\n\n dek: {\n selectors: [\n '.lede-feature-teaser',\n ],\n },\n\n date_published: {\n selectors: [\n 'time.article-timestamp[datetime]',\n 'time.article-timestamp',\n ],\n },\n};\n\nexport default NYMagExtractor;\n","const BloggerExtractor = {\n domain: 'blogspot.com',\n content: {\n // Blogger is insane and does not load its content\n // initially in the page, but it's all there\n // in noscript\n selectors: [\n '.post-content noscript',\n ],\n\n // Selectors to remove from the extracted content\n clean: [\n ],\n\n // Convert the noscript tag to a div\n transforms: {\n noscript: 'div',\n },\n },\n\n author: {\n selectors: [\n '.post-author-name',\n ],\n },\n\n title: {\n selectors: [\n 'h2.title',\n ],\n },\n\n date_published: {\n selectors: [\n 'span.publishdate',\n ],\n },\n};\n\nexport default BloggerExtractor;\n","const WikipediaExtractor = {\n domain: 'wikipedia.org',\n content: {\n selectors: [\n '#mw-content-text',\n ],\n\n defaultCleaner: false,\n\n // transform top infobox to an image with caption\n transforms: {\n '.infobox img': ($node) => {\n const $parent = $node.parents('.infobox');\n // Only prepend the first image in .infobox\n if ($parent.children('img').length === 0) {\n $parent.prepend($node);\n }\n },\n '.infobox caption': 'figcaption',\n '.infobox': 'figure',\n },\n\n // Selectors to remove from the extracted content\n clean: [\n '.mw-editsection',\n 'figure tr, figure td, figure tbody',\n '#toc',\n ],\n\n },\n\n author: 'Wikipedia Contributors',\n\n title: {\n selectors: [\n 'h2.title',\n ],\n },\n\n date_published: {\n selectors: [\n '#footer-info-lastmod',\n ],\n },\n\n};\n\nexport default WikipediaExtractor;\n","const TwitterExtractor = {\n domain: 'twitter.com',\n\n content: {\n transforms: {\n // We're transforming essentially the whole page here.\n // Twitter doesn't have nice selectors, so our initial\n // selector grabs the whole page, then we're re-writing\n // it to fit our needs before we clean it up.\n '.permalink[role=main]': ($node, $) => {\n const tweets = $node.find('.tweet');\n const $tweetContainer = $('');\n $tweetContainer.append(tweets);\n $node.replaceWith($tweetContainer);\n },\n\n // Twitter wraps @ with s, which\n // renders as a strikethrough\n s: 'span',\n },\n\n selectors: [\n '.permalink[role=main]',\n ],\n\n defaultCleaner: false,\n\n clean: [\n '.stream-item-footer',\n 'button',\n '.tweet-details-fixer',\n ],\n },\n\n author: {\n selectors: [\n '.tweet.permalink-tweet .username',\n ],\n },\n\n date_published: {\n selectors: [\n '.permalink-tweet ._timestamp[data-time-ms]',\n // '.tweet.permalink-tweet .metadata',\n ],\n },\n\n};\n\n\nexport default TwitterExtractor;\n","import NYMagExtractor from './custom/nymag.com';\nimport BloggerExtractor from './custom/blogspot.com';\nimport WikipediaExtractor from './custom/wikipedia.org';\nimport TwitterExtractor from './custom/twitter.com';\n\nconst Extractors = {\n 'nymag.com': NYMagExtractor,\n 'blogspot.com': BloggerExtractor,\n 'wikipedia.org': WikipediaExtractor,\n 'twitter.com': TwitterExtractor,\n};\n\nexport default Extractors;\n","// Spacer images to be removed\nexport const SPACER_RE = new RegExp('trans|transparent|spacer|blank', 'i');\n\n// A list of tags to strip from the output if we encounter them.\nexport const STRIP_OUTPUT_TAGS = [\n 'title',\n 'script',\n 'noscript',\n 'link',\n 'style',\n 'hr',\n 'embed',\n 'iframe',\n 'object',\n];\n\n// cleanAttributes\nexport const REMOVE_ATTRS = ['style', 'align'];\nexport const REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(selector => `[${selector}]`);\nexport const REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');\nexport const WHITELIST_ATTRS = ['src', 'href', 'class', 'id', 'score'];\nexport const WHITELIST_ATTRS_RE = new RegExp(`^(${WHITELIST_ATTRS.join('|')})$`, 'i');\n\n// removeEmpty\nexport const REMOVE_EMPTY_TAGS = ['p'];\nexport const REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(tag => `${tag}:empty`).join(',');\n\n// cleanTags\nexport const CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');\n\n// cleanHeaders\nconst HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];\nexport const HEADER_TAG_LIST = HEADER_TAGS.join(',');\n\n\n// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n 'ad-break',\n 'adbox',\n 'advert',\n 'addthis',\n 'agegate',\n 'aux',\n 'blogger-labels',\n 'combx',\n 'comment',\n 'conversation',\n 'disqus',\n 'entry-unrelated',\n 'extra',\n 'foot',\n // 'form', // This is too generic, has too many false positives\n 'header',\n 'hidden',\n 'loader',\n 'login', // Note: This can hit 'blogindex'.\n 'menu',\n 'meta',\n 'nav',\n 'outbrain',\n 'pager',\n 'pagination',\n 'predicta', // readwriteweb inline ad box\n 'presence_control_external', // lifehacker.com container full of false positives\n 'popup',\n 'printfriendly',\n 'related',\n 'remove',\n 'remark',\n 'rss',\n 'share',\n 'shoutbox',\n 'sidebar',\n 'sociable',\n 'sponsor',\n 'taboola',\n 'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n 'and',\n 'article',\n 'body',\n 'blogindex',\n 'column',\n 'content',\n 'entry-content-asset',\n 'format', // misuse of form\n 'hfeed',\n 'hentry',\n 'hatom',\n 'main',\n 'page',\n 'posts',\n 'shadow',\n];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n 'a',\n 'blockquote',\n 'dl',\n 'div',\n 'img',\n 'p',\n 'pre',\n 'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n 'br',\n 'b',\n 'i',\n 'label',\n 'hr',\n 'area',\n 'base',\n 'basefont',\n 'input',\n 'img',\n 'link',\n 'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE =\n new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n ['.hentry', '.entry-content'],\n ['entry', '.entry-content'],\n ['.entry', '.entry_content'],\n ['.post', '.postbody'],\n ['.post', '.post_body'],\n ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = [\n 'figure',\n 'photo',\n 'image',\n 'caption',\n];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n 'article',\n 'articlecontent',\n 'instapaper_body',\n 'blog',\n 'body',\n 'content',\n 'entry-content-asset',\n 'entry',\n 'hentry',\n 'main',\n 'Normal',\n 'page',\n 'pagination',\n 'permalink',\n 'post',\n 'story',\n 'text',\n '[-_]copy', // usatoday\n '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n 'adbox',\n 'advert',\n 'author',\n 'bio',\n 'bookmark',\n 'bottom',\n 'byline',\n 'clear',\n 'com-',\n 'combx',\n 'comment',\n 'comment\\\\B',\n 'contact',\n 'copy',\n 'credit',\n 'crumb',\n 'date',\n 'deck',\n 'excerpt',\n 'featured', // tnr.com has a featured_content which throws us off\n 'foot',\n 'footer',\n 'footnote',\n 'graf',\n 'head',\n 'info',\n 'infotext', // newscientist.com copyright\n 'instapaper_ignore',\n 'jump',\n 'linebreak',\n 'link',\n 'masthead',\n 'media',\n 'meta',\n 'modal',\n 'outbrain', // slate.com junk\n 'promo',\n 'pr_', // autoblog - press release\n 'related',\n 'respond',\n 'roundcontent', // lifehacker restricted content warning\n 'scroll',\n 'secondary',\n 'share',\n 'shopping',\n 'shoutbox',\n 'side',\n 'sidebar',\n 'sponsor',\n 'stamp',\n 'sub',\n 'summary',\n 'tags',\n 'tools',\n 'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// XPath to try to determine if a page is wordpress. Not always successful.\nexport const IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nexport const EXTRANEOUS_LINK_HINTS = [\n 'print',\n 'archive',\n 'comment',\n 'discuss',\n 'e-mail',\n 'email',\n 'share',\n 'reply',\n 'all',\n 'login',\n 'sign',\n 'single',\n 'adx',\n 'entry-unrelated',\n];\nexport const EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nexport const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\n// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))', 'i');\nexport const NEXT_LINK_TEXT_RE = /(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))/i;\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nexport const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nexport const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match 2 or more consecutive tags\nexport const BR_TAGS_RE = new RegExp('( ]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp(' ]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n 'article',\n 'aside',\n 'blockquote',\n 'body',\n 'br',\n 'button',\n 'canvas',\n 'caption',\n 'col',\n 'colgroup',\n 'dd',\n 'div',\n 'dl',\n 'dt',\n 'embed',\n 'fieldset',\n 'figcaption',\n 'figure',\n 'footer',\n 'form',\n 'h1',\n 'h2',\n 'h3',\n 'h4',\n 'h5',\n 'h6',\n 'header',\n 'hgroup',\n 'hr',\n 'li',\n 'map',\n 'object',\n 'ol',\n 'output',\n 'p',\n 'pre',\n 'progress',\n 'section',\n 'table',\n 'tbody',\n 'textarea',\n 'tfoot',\n 'th',\n 'thead',\n 'tr',\n 'ul',\n 'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');\n\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');\n\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import {\n CANDIDATES_WHITELIST,\n CANDIDATES_BLACKLIST,\n} from './constants';\n\nexport default function stripUnlikelyCandidates($) {\n // Loop through the provided document and remove any non-link nodes\n // that are unlikely candidates for article content.\n //\n // Links are ignored because there are very often links to content\n // that are identified as non-body-content, but may be inside\n // article-like content.\n //\n // :param $: a cheerio object to strip nodes from\n // :return $: the cleaned cheerio object\n $('*').not('a').each((index, node) => {\n const $node = $(node);\n const classes = $node.attr('class');\n const id = $node.attr('id');\n if (!id && !classes) return;\n\n const classAndId = `${classes || ''} ${id || ''}`;\n if (CANDIDATES_WHITELIST.test(classAndId)) {\n return;\n } else if (CANDIDATES_BLACKLIST.test(classAndId)) {\n $node.remove();\n }\n });\n\n return $;\n}\n","import { paragraphize } from './index';\n\n// ## NOTES:\n// Another good candidate for refactoring/optimizing.\n// Very imperative code, I don't love it. - AP\n\n\n// Given cheerio object, convert consecutive tags into\n// tags instead.\n//\n// :param $: A cheerio object\n\nexport default function brsToPs($) {\n let collapsing = false;\n $('br').each((index, element) => {\n const nextElement = $(element).next().get(0);\n\n if (nextElement && nextElement.tagName === 'br') {\n collapsing = true;\n $(element).remove();\n } else if (collapsing) {\n collapsing = false;\n // $(element).replaceWith('')\n paragraphize(element, $, true);\n }\n });\n\n return $;\n}\n","import { BLOCK_LEVEL_TAGS_RE } from './constants';\n\n// Given a node, turn it into a P if it is not already a P, and\n// make sure it conforms to the constraints of a P tag (I.E. does\n// not contain any other block tags.)\n//\n// If the node is a , it treats the following inline siblings\n// as if they were its children.\n//\n// :param node: The node to paragraphize; this is a raw node\n// :param $: The cheerio object to handle dom manipulation\n// :param br: Whether or not the passed node is a br\n\nexport default function paragraphize(node, $, br = false) {\n const $node = $(node);\n\n if (br) {\n let sibling = node.nextSibling;\n const p = $('');\n\n // while the next node is text or not a block level element\n // append it to a new p node\n while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))) {\n const nextSibling = sibling.nextSibling;\n $(sibling).appendTo(p);\n sibling = nextSibling;\n }\n\n $node.replaceWith(p);\n $node.remove();\n return $;\n }\n\n return $;\n}\n","import { brsToPs, convertNodeTo } from 'utils/dom';\n\nimport { DIV_TO_P_BLOCK_TAGS } from './constants';\n\nfunction convertDivs($) {\n $('div').each((index, div) => {\n const $div = $(div);\n const convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;\n\n if (convertable) {\n convertNodeTo($div, $, 'p');\n }\n });\n\n return $;\n}\n\nfunction convertSpans($) {\n $('span').each((index, span) => {\n const $span = $(span);\n const convertable = $span.parents('p, div').length === 0;\n if (convertable) {\n convertNodeTo($span, $, 'p');\n }\n });\n\n return $;\n}\n\n// Loop through the provided doc, and convert any p-like elements to\n// actual paragraph tags.\n//\n// Things fitting this criteria:\n// * Multiple consecutive tags.\n// * tags without block level elements inside of them\n// * tags who are not children of or tags.\n//\n// :param $: A cheerio object to search\n// :return cheerio object with new p elements\n// (By-reference mutation, though. Returned just for convenience.)\n\nexport default function convertToParagraphs($) {\n $ = brsToPs($);\n $ = convertDivs($);\n $ = convertSpans($);\n\n return $;\n}\n","import 'babel-polyfill';\n\nexport default function convertNodeTo($node, $, tag = 'p') {\n const node = $node.get(0);\n if (!node) {\n return $;\n }\n const { attribs } = $node.get(0);\n const attribString = Reflect.ownKeys(attribs)\n .map(key => `${key}=${attribs[key]}`)\n .join(' ');\n\n $node.replaceWith(`<${tag} ${attribString}>${$node.contents()}${tag}>`);\n return $;\n}\n","import { SPACER_RE } from './constants';\n\nfunction cleanForHeight($img, $) {\n const height = parseInt($img.attr('height'), 10);\n const width = parseInt($img.attr('width'), 10) || 20;\n\n // Remove images that explicitly have very small heights or\n // widths, because they are most likely shims or icons,\n // which aren't very useful for reading.\n if ((height || 20) < 10 || width < 10) {\n $img.remove();\n } else if (height) {\n // Don't ever specify a height on images, so that we can\n // scale with respect to width without screwing up the\n // aspect ratio.\n $img.removeAttr('height');\n }\n\n return $;\n}\n\n// Cleans out images where the source string matches transparent/spacer/etc\n// TODO This seems very aggressive - AP\nfunction removeSpacers($img, $) {\n if (SPACER_RE.test($img.attr('src'))) {\n $img.remove();\n }\n\n return $;\n}\n\nexport default function cleanImages($article, $) {\n $article.find('img').each((index, img) => {\n const $img = $(img);\n\n cleanForHeight($img, $);\n removeSpacers($img, $);\n });\n\n return $;\n}\n","import {\n STRIP_OUTPUT_TAGS,\n} from './constants';\n\nexport default function stripJunkTags(article, $) {\n $(STRIP_OUTPUT_TAGS.join(','), article).remove();\n\n return $;\n}\n","import { convertNodeTo } from 'utils/dom';\n\n// H1 tags are typically the article title, which should be extracted\n// by the title extractor instead. If there's less than 3 of them (<3),\n// strip them. Otherwise, turn 'em into H2s.\nexport default function cleanHOnes(article, $) {\n const $hOnes = $('h1', article);\n\n if ($hOnes.length < 3) {\n $hOnes.each((index, node) => $(node).remove());\n } else {\n $hOnes.each((index, node) => {\n convertNodeTo($(node), $, 'h2');\n });\n }\n\n return $;\n}\n","import 'babel-polyfill';\n\nimport { WHITELIST_ATTRS_RE } from './constants';\n\nfunction removeAllButWhitelist($article) {\n // $('*', article).each((index, node) => {\n $article.find('*').each((index, node) => {\n node.attribs = Reflect.ownKeys(node.attribs).reduce((acc, attr) => {\n if (WHITELIST_ATTRS_RE.test(attr)) {\n return { ...acc, [attr]: node.attribs[attr] };\n }\n\n return acc;\n }, {});\n });\n}\n\n// function removeAttrs(article, $) {\n// REMOVE_ATTRS.forEach((attr) => {\n// $(`[${attr}]`, article).removeAttr(attr);\n// });\n// }\n\n// Remove attributes like style or align\nexport default function cleanAttributes($article) {\n removeAllButWhitelist($article);\n\n return $article;\n}\n","export default function removeEmpty($article, $) {\n $article.find('p').each((index, p) => {\n const $p = $(p);\n if ($p.text().trim() === '') $p.remove();\n });\n\n return $;\n}\n","// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n 'ad-break',\n 'adbox',\n 'advert',\n 'addthis',\n 'agegate',\n 'aux',\n 'blogger-labels',\n 'combx',\n 'comment',\n 'conversation',\n 'disqus',\n 'entry-unrelated',\n 'extra',\n 'foot',\n 'form',\n 'header',\n 'hidden',\n 'loader',\n 'login', // Note: This can hit 'blogindex'.\n 'menu',\n 'meta',\n 'nav',\n 'pager',\n 'pagination',\n 'predicta', // readwriteweb inline ad box\n 'presence_control_external', // lifehacker.com container full of false positives\n 'popup',\n 'printfriendly',\n 'related',\n 'remove',\n 'remark',\n 'rss',\n 'share',\n 'shoutbox',\n 'sidebar',\n 'sociable',\n 'sponsor',\n 'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n 'and',\n 'article',\n 'body',\n 'blogindex',\n 'column',\n 'content',\n 'entry-content-asset',\n 'format', // misuse of form\n 'hfeed',\n 'hentry',\n 'hatom',\n 'main',\n 'page',\n 'posts',\n 'shadow',\n];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n 'a',\n 'blockquote',\n 'dl',\n 'div',\n 'img',\n 'p',\n 'pre',\n 'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n 'br',\n 'b',\n 'i',\n 'label',\n 'hr',\n 'area',\n 'base',\n 'basefont',\n 'input',\n 'img',\n 'link',\n 'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE =\n new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n ['.hentry', '.entry-content'],\n ['entry', '.entry-content'],\n ['.entry', '.entry_content'],\n ['.post', '.postbody'],\n ['.post', '.post_body'],\n ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = [\n 'figure',\n 'photo',\n 'image',\n 'caption',\n];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n 'article',\n 'articlecontent',\n 'instapaper_body',\n 'blog',\n 'body',\n 'content',\n 'entry-content-asset',\n 'entry',\n 'hentry',\n 'main',\n 'Normal',\n 'page',\n 'pagination',\n 'permalink',\n 'post',\n 'story',\n 'text',\n '[-_]copy', // usatoday\n '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n 'adbox',\n 'advert',\n 'author',\n 'bio',\n 'bookmark',\n 'bottom',\n 'byline',\n 'clear',\n 'com-',\n 'combx',\n 'comment',\n 'comment\\\\B',\n 'contact',\n 'copy',\n 'credit',\n 'crumb',\n 'date',\n 'deck',\n 'excerpt',\n 'featured', // tnr.com has a featured_content which throws us off\n 'foot',\n 'footer',\n 'footnote',\n 'graf',\n 'head',\n 'info',\n 'infotext', // newscientist.com copyright\n 'instapaper_ignore',\n 'jump',\n 'linebreak',\n 'link',\n 'masthead',\n 'media',\n 'meta',\n 'modal',\n 'outbrain', // slate.com junk\n 'promo',\n 'pr_', // autoblog - press release\n 'related',\n 'respond',\n 'roundcontent', // lifehacker restricted content warning\n 'scroll',\n 'secondary',\n 'share',\n 'shopping',\n 'shoutbox',\n 'side',\n 'sidebar',\n 'sponsor',\n 'stamp',\n 'sub',\n 'summary',\n 'tags',\n 'tools',\n 'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// Match 2 or more consecutive tags\nexport const BR_TAGS_RE = new RegExp('( ]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp(' ]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n 'article',\n 'aside',\n 'blockquote',\n 'body',\n 'br',\n 'button',\n 'canvas',\n 'caption',\n 'col',\n 'colgroup',\n 'dd',\n 'div',\n 'dl',\n 'dt',\n 'embed',\n 'fieldset',\n 'figcaption',\n 'figure',\n 'footer',\n 'form',\n 'h1',\n 'h2',\n 'h3',\n 'h4',\n 'h5',\n 'h6',\n 'header',\n 'hgroup',\n 'hr',\n 'li',\n 'map',\n 'object',\n 'ol',\n 'output',\n 'p',\n 'pre',\n 'progress',\n 'section',\n 'table',\n 'tbody',\n 'textarea',\n 'tfoot',\n 'th',\n 'thead',\n 'tr',\n 'ul',\n 'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');\n\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');\n\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import {\n NEGATIVE_SCORE_RE,\n POSITIVE_SCORE_RE,\n PHOTO_HINTS_RE,\n READABILITY_ASSET,\n} from './constants';\n\n\n// Get the score of a node based on its className and id.\nexport default function getWeight(node) {\n const classes = node.attr('class');\n const id = node.attr('id');\n let score = 0;\n\n if (id) {\n // if id exists, try to score on both positive and negative\n if (POSITIVE_SCORE_RE.test(id)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE.test(id)) {\n score -= 25;\n }\n }\n\n if (classes) {\n if (score === 0) {\n // if classes exist and id did not contribute to score\n // try to score on both positive and negative\n if (POSITIVE_SCORE_RE.test(classes)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE.test(classes)) {\n score -= 25;\n }\n }\n\n // even if score has been set by id, add score for\n // possible photo matches\n // \"try to keep photos if we can\"\n if (PHOTO_HINTS_RE.test(classes)) {\n score += 10;\n }\n\n // add 25 if class matches entry-content-asset,\n // a class apparently instructed for use in the\n // Readability publisher guidelines\n // https://www.readability.com/developers/guidelines\n if (READABILITY_ASSET.test(classes)) {\n score += 25;\n }\n }\n\n return score;\n}\n\n","// returns the score of a node based on\n// the node's score attribute\n// returns null if no score set\nexport default function getScore($node) {\n return parseFloat($node.attr('score')) || null;\n}\n","// return 1 for every comma in text\nexport default function scoreCommas(text) {\n return (text.match(/,/g) || []).length;\n}\n\n","const idkRe = new RegExp('^(p|pre)$', 'i');\n\nexport default function scoreLength(textLength, tagName = 'p') {\n const chunks = textLength / 50;\n\n if (chunks > 0) {\n let lengthBonus;\n\n // No idea why p or pre are being tamped down here\n // but just following the source for now\n // Not even sure why tagName is included here,\n // since this is only being called from the context\n // of scoreParagraph\n if (idkRe.test(tagName)) {\n lengthBonus = chunks - 2;\n } else {\n lengthBonus = chunks - 1.25;\n }\n\n return Math.min(Math.max(lengthBonus, 0), 3);\n }\n\n return 0;\n}\n\n","import {\n scoreCommas,\n scoreLength,\n} from './index';\n\n// Score a paragraph using various methods. Things like number of\n// commas, etc. Higher is better.\nexport default function scoreParagraph(node) {\n let score = 1;\n const text = node.text().trim();\n const textLength = text.length;\n\n // If this paragraph is less than 25 characters, don't count it.\n if (textLength < 25) {\n return 0;\n }\n\n // Add points for any commas within this paragraph\n score += scoreCommas(text);\n\n // For every 50 characters in this paragraph, add another point. Up\n // to 3 points.\n score += scoreLength(textLength);\n\n // Articles can end with short paragraphs when people are being clever\n // but they can also end with short paragraphs setting up lists of junk\n // that we strip. This negative tweaks junk setup paragraphs just below\n // the cutoff threshold.\n if (text.slice(-1) === ':') {\n score -= 1;\n }\n\n return score;\n}\n\n","\nexport default function setScore($node, $, score) {\n $node.attr('score', score);\n return $node;\n}\n\n","import {\n getOrInitScore,\n setScore,\n} from './index';\n\nexport default function addScore($node, $, amount) {\n try {\n const score = getOrInitScore($node, $) + amount;\n setScore($node, $, score);\n } catch (e) {\n // Ignoring; error occurs in scoreNode\n }\n\n return $node;\n}\n","import { addScore } from './index';\n\n// Adds 1/4 of a child's score to its parent\nexport default function addToParent(node, $, score) {\n const parent = node.parent();\n if (parent) {\n addScore(parent, $, score * 0.25);\n }\n\n return node;\n}\n","import {\n getScore,\n scoreNode,\n getWeight,\n addToParent,\n} from './index';\n\n// gets and returns the score if it exists\n// if not, initializes a score based on\n// the node's tag type\nexport default function getOrInitScore($node, $, weightNodes = true) {\n let score = getScore($node);\n\n if (score) {\n return score;\n }\n\n score = scoreNode($node);\n\n if (weightNodes) {\n score += getWeight($node);\n }\n\n addToParent($node, $, score);\n\n return score;\n}\n\n","import { scoreParagraph } from './index';\nimport {\n PARAGRAPH_SCORE_TAGS,\n CHILD_CONTENT_TAGS,\n BAD_TAGS,\n} from './constants';\n\n// Score an individual node. Has some smarts for paragraphs, otherwise\n// just scores based on tag.\nexport default function scoreNode($node) {\n const { tagName } = $node.get(0);\n\n // TODO: Consider ordering by most likely.\n // E.g., if divs are a more common tag on a page,\n // Could save doing that regex test on every node – AP\n if (PARAGRAPH_SCORE_TAGS.test(tagName)) {\n return scoreParagraph($node);\n } else if (tagName === 'div') {\n return 5;\n } else if (CHILD_CONTENT_TAGS.test(tagName)) {\n return 3;\n } else if (BAD_TAGS.test(tagName)) {\n return -3;\n } else if (tagName === 'th') {\n return -5;\n }\n\n return 0;\n}\n","import { convertNodeTo } from 'utils/dom';\n\nimport { HNEWS_CONTENT_SELECTORS } from './constants';\nimport {\n scoreNode,\n setScore,\n getOrInitScore,\n addScore,\n} from './index';\n\nfunction convertSpans($node, $) {\n if ($node.get(0)) {\n const { tagName } = $node.get(0);\n\n if (tagName === 'span') {\n // convert spans to divs\n convertNodeTo($node, $, 'div');\n }\n }\n}\n\nfunction addScoreTo($node, $, score) {\n if ($node) {\n convertSpans($node, $);\n addScore($node, $, score);\n }\n}\n\nfunction scorePs($, weightNodes) {\n $('p, pre').not('[score]').each((index, node) => {\n // The raw score for this paragraph, before we add any parent/child\n // scores.\n let $node = $(node);\n $node = setScore($node, $, getOrInitScore($node, $, weightNodes));\n\n const $parent = $node.parent();\n const rawScore = scoreNode($node);\n\n addScoreTo($parent, $, rawScore, weightNodes);\n if ($parent) {\n // Add half of the individual content score to the\n // grandparent\n addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);\n }\n });\n\n return $;\n}\n\n// score content. Parents get the full value of their children's\n// content score, grandparents half\nexport default function scoreContent($, weightNodes = true) {\n // First, look for special hNews based selectors and give them a big\n // boost, if they exist\n HNEWS_CONTENT_SELECTORS.forEach(([parentSelector, childSelector]) => {\n $(`${parentSelector} ${childSelector}`).each((index, node) => {\n addScore($(node).parent(parentSelector), $, 80);\n });\n });\n\n // Doubling this again\n // Previous solution caused a bug\n // in which parents weren't retaining\n // scores. This is not ideal, and\n // should be fixed.\n scorePs($, weightNodes);\n scorePs($, weightNodes);\n\n return $;\n}\n","const NORMALIZE_RE = /\\s{2,}/g;\n\nexport default function normalizeSpaces(text) {\n return text.replace(NORMALIZE_RE, ' ').trim();\n}\n","// Given a node type to search for, and a list of regular expressions,\n// look to see if this extraction can be found in the URL. Expects\n// that each expression in r_list will return group(1) as the proper\n// string to be cleaned.\n// Only used for date_published currently.\nexport default function extractFromUrl(url, regexList) {\n const matchRe = regexList.find(re => re.test(url));\n if (matchRe) {\n return matchRe.exec(url)[1];\n }\n\n return null;\n}\n","// An expression that looks to try to find the page digit within a URL, if\n// it exists.\n// Matches:\n// page=1\n// pg=1\n// p=1\n// paging=12\n// pag=7\n// pagination/1\n// paging/88\n// pa/83\n// p/11\n//\n// Does not match:\n// pg=102\n// page:2\nexport const PAGE_IN_HREF_RE = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');\n\nexport const HAS_ALPHA_RE = /[a-z]/i;\n\nexport const IS_ALPHA_RE = /^[a-z]+$/i;\nexport const IS_DIGIT_RE = /^[0-9]+$/i;\n","import { PAGE_IN_HREF_RE } from './constants';\n\nexport default function pageNumFromUrl(url) {\n const matches = url.match(PAGE_IN_HREF_RE);\n if (!matches) return null;\n\n const pageNum = parseInt(matches[6], 10);\n\n // Return pageNum < 100, otherwise\n // return null\n return pageNum < 100 ? pageNum : null;\n}\n","export default function removeAnchor(url) {\n return url.split('#')[0].replace(/\\/$/, '');\n}\n","import URL from 'url';\nimport {\n HAS_ALPHA_RE,\n IS_ALPHA_RE,\n IS_DIGIT_RE,\n PAGE_IN_HREF_RE,\n} from './constants';\n\nfunction isGoodSegment(segment, index, firstSegmentHasLetters) {\n let goodSegment = true;\n\n // If this is purely a number, and it's the first or second\n // url_segment, it's probably a page number. Remove it.\n if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {\n goodSegment = true;\n }\n\n // If this is the first url_segment and it's just \"index\",\n // remove it\n if (index === 0 && segment.toLowerCase() === 'index') {\n goodSegment = false;\n }\n\n // If our first or second url_segment is smaller than 3 characters,\n // and the first url_segment had no alphas, remove it.\n if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {\n goodSegment = false;\n }\n\n return goodSegment;\n}\n\n// Take a URL, and return the article base of said URL. That is, no\n// pagination data exists in it. Useful for comparing to other links\n// that might have pagination data within them.\nexport default function articleBaseUrl(url, parsed) {\n const parsedUrl = parsed || URL.parse(url);\n const { protocol, host, path } = parsedUrl;\n\n let firstSegmentHasLetters = false;\n const cleanedSegments = path.split('/')\n .reverse()\n .reduce((acc, rawSegment, index) => {\n let segment = rawSegment;\n\n // Split off and save anything that looks like a file type.\n if (segment.includes('.')) {\n const [possibleSegment, fileExt] = segment.split('.');\n if (IS_ALPHA_RE.test(fileExt)) {\n segment = possibleSegment;\n }\n }\n\n // If our first or second segment has anything looking like a page\n // number, remove it.\n if (PAGE_IN_HREF_RE.test(segment) && index < 2) {\n segment = segment.replace(PAGE_IN_HREF_RE, '');\n }\n\n // If we're on the first segment, check to see if we have any\n // characters in it. The first segment is actually the last bit of\n // the URL, and this will be helpful to determine if we're on a URL\n // segment that looks like \"/2/\" for example.\n if (index === 0) {\n firstSegmentHasLetters = HAS_ALPHA_RE.test(segment);\n }\n\n // If it's not marked for deletion, push it to cleaned_segments.\n if (isGoodSegment(segment, index, firstSegmentHasLetters)) {\n acc.push(segment);\n }\n\n return acc;\n }, []);\n\n return `${protocol}//${host}${cleanedSegments.reverse().join('/')}`;\n}\n","// Given a string, return True if it appears to have an ending sentence\n// within it, false otherwise.\nconst SENTENCE_END_RE = new RegExp('.( |$)');\nexport default function hasSentenceEnd(text) {\n return SENTENCE_END_RE.test(text);\n}\n\n","import {\n textLength,\n linkDensity,\n} from 'utils/dom';\nimport { hasSentenceEnd } from 'utils/text';\n\nimport { NON_TOP_CANDIDATE_TAGS_RE } from './constants';\nimport { getScore } from './index';\n\n// Now that we have a top_candidate, look through the siblings of\n// it to see if any of them are decently scored. If they are, they\n// may be split parts of the content (Like two divs, a preamble and\n// a body.) Example:\n// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14\nexport default function mergeSiblings($candidate, topScore, $) {\n if (!$candidate.parent().length) {\n return $candidate;\n }\n\n const siblingScoreThreshold = Math.max(10, topScore * 0.25);\n const wrappingDiv = $('');\n\n $candidate.parent().children().each((index, sibling) => {\n const $sibling = $(sibling);\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE.test(sibling.tagName)) {\n return null;\n }\n\n const siblingScore = getScore($sibling);\n if (siblingScore) {\n if ($sibling === $candidate) {\n wrappingDiv.append($sibling);\n } else {\n let contentBonus = 0;\n const density = linkDensity($sibling);\n\n // If sibling has a very low link density,\n // give it a small bonus\n if (density < 0.05) {\n contentBonus += 20;\n }\n\n // If sibling has a high link density,\n // give it a penalty\n if (density >= 0.5) {\n contentBonus -= 20;\n }\n\n // If sibling node has the same class as\n // candidate, give it a bonus\n if ($sibling.attr('class') === $candidate.attr('class')) {\n contentBonus += topScore * 0.2;\n }\n\n const newScore = siblingScore + contentBonus;\n\n if (newScore >= siblingScoreThreshold) {\n return wrappingDiv.append($sibling);\n } else if (sibling.tagName === 'p') {\n const siblingContent = $sibling.text();\n const siblingContentLength = textLength(siblingContent);\n\n if (siblingContentLength > 80 && density < 0.25) {\n return wrappingDiv.append($sibling);\n } else if (siblingContentLength <= 80 && density === 0 &&\n hasSentenceEnd(siblingContent)) {\n return wrappingDiv.append($sibling);\n }\n }\n }\n }\n\n return null;\n });\n\n return wrappingDiv;\n}\n","import { NON_TOP_CANDIDATE_TAGS_RE } from './constants';\nimport { getScore } from './index';\nimport mergeSiblings from './merge-siblings';\n\n// After we've calculated scores, loop through all of the possible\n// candidate nodes we found and find the one with the highest score.\nexport default function findTopCandidate($) {\n let $candidate;\n let topScore = 0;\n\n $('[score]').each((index, node) => {\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE.test(node.tagName)) {\n return;\n }\n\n const $node = $(node);\n const score = getScore($node);\n\n if (score > topScore) {\n topScore = score;\n $candidate = $node;\n }\n });\n\n // If we don't have a candidate, return the body\n // or whatever the first element is\n if (!$candidate) {\n return $('body') || $('*').first();\n }\n\n $candidate = mergeSiblings($candidate, topScore, $);\n\n return $candidate;\n}\n","import {\n getScore,\n setScore,\n getOrInitScore,\n scoreCommas,\n} from 'extractors/generic/content/scoring';\n\nimport { CLEAN_CONDITIONALLY_TAGS } from './constants';\nimport { normalizeSpaces } from '../text';\nimport { linkDensity } from './index';\n\nfunction removeUnlessContent($node, $, weight) {\n // Explicitly save entry-content-asset tags, which are\n // noted as valuable in the Publisher guidelines. For now\n // this works everywhere. We may want to consider making\n // this less of a sure-thing later.\n if ($node.hasClass('entry-content-asset')) {\n return;\n }\n\n const content = normalizeSpaces($node.text());\n\n if (scoreCommas(content) < 10) {\n const pCount = $('p', $node).length;\n const inputCount = $('input', $node).length;\n\n // Looks like a form, too many inputs.\n if (inputCount > (pCount / 3)) {\n $node.remove();\n return;\n }\n\n const contentLength = content.length;\n const imgCount = $('img', $node).length;\n\n // Content is too short, and there are no images, so\n // this is probably junk content.\n if (contentLength < 25 && imgCount === 0) {\n $node.remove();\n return;\n }\n\n const density = linkDensity($node);\n\n // Too high of link density, is probably a menu or\n // something similar.\n // console.log(weight, density, contentLength)\n if (weight < 25 && density > 0.2 && contentLength > 75) {\n $node.remove();\n return;\n }\n\n // Too high of a link density, despite the score being\n // high.\n if (weight >= 25 && density > 0.5) {\n // Don't remove the node if it's a list and the\n // previous sibling starts with a colon though. That\n // means it's probably content.\n const tagName = $node.get(0).tagName;\n const nodeIsList = tagName === 'ol' || tagName === 'ul';\n if (nodeIsList) {\n const previousNode = $node.prev();\n if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {\n return;\n }\n }\n\n $node.remove();\n return;\n }\n\n const scriptCount = $('script', $node).length;\n\n // Too many script tags, not enough content.\n if (scriptCount > 0 && contentLength < 150) {\n $node.remove();\n return;\n }\n }\n}\n\n// Given an article, clean it of some superfluous content specified by\n// tags. Things like forms, ads, etc.\n//\n// Tags is an array of tag name's to search through. (like div, form,\n// etc)\n//\n// Return this same doc.\nexport default function cleanTags($article, $) {\n $(CLEAN_CONDITIONALLY_TAGS, $article).each((index, node) => {\n const $node = $(node);\n let weight = getScore($node);\n if (!weight) {\n weight = getOrInitScore($node, $);\n setScore($node, $, weight);\n }\n\n // drop node if its weight is < 0\n if (weight < 0) {\n $node.remove();\n } else {\n // deteremine if node seems like content\n removeUnlessContent($node, $, weight);\n }\n });\n\n return $;\n}\n\n","import { getWeight } from 'extractors/generic/content/scoring';\n\nimport { HEADER_TAG_LIST } from './constants';\nimport { normalizeSpaces } from '../text';\n\nexport default function cleanHeaders($article, $, title = '') {\n $(HEADER_TAG_LIST, $article).each((index, header) => {\n const $header = $(header);\n // Remove any headers that appear before all other p tags in the\n // document. This probably means that it was part of the title, a\n // subtitle or something else extraneous like a datestamp or byline,\n // all of which should be handled by other metadata handling.\n if ($($header, $article).prevAll('p').length === 0) {\n return $header.remove();\n }\n\n // Remove any headers that match the title exactly.\n if (normalizeSpaces($(header).text()) === title) {\n return $header.remove();\n }\n\n // If this header has a negative weight, it's probably junk.\n // Get rid of it.\n if (getWeight($(header)) < 0) {\n return $header.remove();\n }\n\n return $header;\n });\n\n return $;\n}\n","import { convertNodeTo } from 'utils/dom';\n\n// Rewrite the tag name to div if it's a top level node like body or\n// html to avoid later complications with multiple body tags.\nexport default function rewriteTopLevel(article, $) {\n // I'm not using context here because\n // it's problematic when converting the\n // top-level/root node - AP\n $ = convertNodeTo($('html'), $, 'div');\n $ = convertNodeTo($('body'), $, 'div');\n\n return $;\n}\n","import URL from 'url';\n\nfunction absolutize($, rootUrl, attr, $content) {\n $(`[${attr}]`, $content).each((_, node) => {\n const url = node.attribs[attr];\n const absoluteUrl = URL.resolve(rootUrl, url);\n\n node.attribs[attr] = absoluteUrl;\n });\n}\n\nexport default function makeLinksAbsolute($content, $, url) {\n ['href', 'src'].forEach(attr => absolutize($, url, attr, $content));\n\n return $content;\n}\n","\nexport function textLength(text) {\n return text.trim()\n .replace(/\\s+/g, ' ')\n .length;\n}\n\n// Determines what percentage of the text\n// in a node is link text\n// Takes a node, returns a float\nexport function linkDensity($node) {\n const totalTextLength = textLength($node.text());\n\n const linkText = $node.find('a').text();\n const linkLength = textLength(linkText);\n\n if (totalTextLength > 0) {\n return linkLength / totalTextLength;\n } else if (totalTextLength === 0 && linkLength > 0) {\n return 1;\n }\n\n return 0;\n}\n","import { stripTags } from 'utils/dom';\n\n// Given a node type to search for, and a list of meta tag names to\n// search for, find a meta tag associated.\nexport default function extractFromMeta(\n $,\n metaNames,\n cachedNames,\n cleanTags = true\n) {\n const foundNames = metaNames.filter(name => cachedNames.indexOf(name) !== -1);\n\n for (const name of foundNames) {\n const type = 'name';\n const value = 'value';\n\n const nodes = $(`meta[${type}=\"${name}\"]`);\n\n // Get the unique value of every matching node, in case there\n // are two meta tags with the same name and value.\n // Remove empty values.\n const values =\n nodes.map((index, node) => $(node).attr(value))\n .toArray()\n .filter(text => text !== '');\n\n // If we have more than one value for the same name, we have a\n // conflict and can't trust any of them. Skip this name. If we have\n // zero, that means our meta tags had no values. Skip this name\n // also.\n if (values.length === 1) {\n let metaValue;\n // Meta values that contain HTML should be stripped, as they\n // weren't subject to cleaning previously.\n if (cleanTags) {\n metaValue = stripTags(values[0], $);\n } else {\n metaValue = values[0];\n }\n\n return metaValue;\n }\n }\n\n // If nothing is found, return null\n return null;\n}\n","import { withinComment } from 'utils/dom';\n\nfunction isGoodNode($node, maxChildren) {\n // If it has a number of children, it's more likely a container\n // element. Skip it.\n if ($node.children().length > maxChildren) {\n return false;\n }\n // If it looks to be within a comment, skip it.\n if (withinComment($node)) {\n return false;\n }\n\n return true;\n}\n\n// Given a a list of selectors find content that may\n// be extractable from the document. This is for flat\n// meta-information, like author, title, date published, etc.\nexport default function extractFromSelectors(\n $,\n selectors,\n maxChildren = 1,\n textOnly = true\n) {\n for (const selector of selectors) {\n const nodes = $(selector);\n\n // If we didn't get exactly one of this selector, this may be\n // a list of articles or comments. Skip it.\n if (nodes.length === 1) {\n const $node = $(nodes[0]);\n\n if (isGoodNode($node, maxChildren)) {\n let content;\n if (textOnly) {\n content = $node.text();\n } else {\n content = $node.html();\n }\n\n if (content) {\n return content;\n }\n }\n }\n }\n\n return null;\n}\n","// strips all tags from a string of text\nexport default function stripTags(text, $) {\n // Wrapping text in html element prevents errors when text\n // has no html\n const cleanText = $(`${text}`).text();\n return cleanText === '' ? text : cleanText;\n}\n","export default function withinComment($node) {\n const parents = $node.parents().toArray();\n const commentParent = parents.find((parent) => {\n const classAndId = `${parent.attribs.class} ${parent.attribs.id}`;\n return classAndId.includes('comment');\n });\n\n return commentParent !== undefined;\n}\n","// Given a node, determine if it's article-like enough to return\n// param: node (a cheerio node)\n// return: boolean\n\nexport default function nodeIsSufficient($node) {\n return $node.text().trim().length >= 100;\n}\n","import { IS_WP_SELECTOR } from './constants';\n\nexport default function isWordpress($) {\n return $(IS_WP_SELECTOR).length > 0;\n}\n","// CLEAN AUTHOR CONSTANTS\nexport const CLEAN_AUTHOR_RE = /^\\s*(posted |written )?by\\s*:?\\s*(.*)/i;\n // author = re.sub(r'^\\s*(posted |written )?by\\s*:?\\s*(.*)(?i)',\n\n// CLEAN DEK CONSTANTS\nexport const TEXT_LINK_RE = new RegExp('http(s)?://', 'i');\n// An ordered list of meta tag names that denote likely article deks.\n// From most distinct to least distinct.\n//\n// NOTE: There are currently no meta tags that seem to provide the right\n// content consistenty enough. Two options were:\n// - og:description\n// - dc.description\n// However, these tags often have SEO-specific junk in them that's not\n// header-worthy like a dek is. Excerpt material at best.\nexport const DEK_META_TAGS = [\n];\n\n// An ordered list of Selectors to find likely article deks. From\n// most explicit to least explicit.\n//\n// Should be more restrictive than not, as a failed dek can be pretty\n// detrimental to the aesthetics of an article.\nexport const DEK_SELECTORS = [\n '.entry-summary',\n];\n\n// CLEAN DATE PUBLISHED CONSTANTS\nexport const MS_DATE_STRING = /^\\d{13}$/i;\nexport const SEC_DATE_STRING = /^\\d{10}$/i;\nexport const CLEAN_DATE_STRING_RE = /^\\s*published\\s*:?\\s*(.*)/i;\nexport const TIME_MERIDIAN_SPACE_RE = /(.*\\d)(am|pm)(.*)/i;\nexport const TIME_MERIDIAN_DOTS_RE = /\\.m\\./i;\nconst months = [\n 'jan',\n 'feb',\n 'mar',\n 'apr',\n 'may',\n 'jun',\n 'jul',\n 'aug',\n 'sep',\n 'oct',\n 'nov',\n 'dec',\n];\nconst allMonths = months.join('|');\nconst timestamp1 = '[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';\nconst timestamp2 = '[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';\nexport const SPLIT_DATE_STRING =\n new RegExp(`(${timestamp1})|(${timestamp2})|([0-9]{1,4})|(${allMonths})`, 'ig');\n\n// CLEAN TITLE CONSTANTS\n// A regular expression that will match separating characters on a\n// title, that usually denote breadcrumbs or something similar.\nexport const TITLE_SPLITTERS_RE = /(: | - | \\| )/g;\n\nexport const DOMAIN_ENDINGS_RE =\n new RegExp('.com$|.net$|.org$|.co.uk$', 'g');\n","import { CLEAN_AUTHOR_RE } from './constants';\n\n// Take an author string (like 'By David Smith ') and clean it to\n// just the name(s): 'David Smith'.\nexport default function cleanAuthor(author) {\n return author.replace(CLEAN_AUTHOR_RE, '$2').trim();\n}\n","import validUrl from 'valid-url';\n\nexport default function clean(leadImageUrl) {\n leadImageUrl = leadImageUrl.trim();\n if (validUrl.isWebUri(leadImageUrl)) {\n return leadImageUrl;\n }\n\n return null;\n}\n","import { stripTags } from 'utils/dom';\n\nimport { TEXT_LINK_RE } from './constants';\n\n// Take a dek HTML fragment, and return the cleaned version of it.\n// Return None if the dek wasn't good enough.\nexport default function cleanDek(dek, { $ }) {\n // Sanity check that we didn't get too short or long of a dek.\n if (dek.length > 1000 || dek.length < 5) return null;\n\n const dekText = stripTags(dek, $);\n\n // Plain text links shouldn't exist in the dek. If we have some, it's\n // not a good dek - bail.\n if (TEXT_LINK_RE.test(dekText)) return null;\n\n return dekText.trim();\n}\n","import moment from 'moment';\n// Is there a compelling reason to use moment here?\n// Mostly only being used for the isValid() method,\n// but could just check for 'Invalid Date' string.\n\nimport {\n MS_DATE_STRING,\n SEC_DATE_STRING,\n CLEAN_DATE_STRING_RE,\n SPLIT_DATE_STRING,\n TIME_MERIDIAN_SPACE_RE,\n TIME_MERIDIAN_DOTS_RE,\n} from './constants';\n\nexport function cleanDateString(dateString) {\n return (dateString.match(SPLIT_DATE_STRING) || [])\n .join(' ')\n .replace(TIME_MERIDIAN_DOTS_RE, 'm')\n .replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3')\n .replace(CLEAN_DATE_STRING_RE, '$1')\n .trim();\n}\n\n// Take a date published string, and hopefully return a date out of\n// it. Return none if we fail.\nexport default function cleanDatePublished(dateString) {\n // If string is in milliseconds or seconds, convert to int\n if (MS_DATE_STRING.test(dateString) || SEC_DATE_STRING.test(dateString)) {\n dateString = parseInt(dateString, 10);\n }\n\n let date = moment(new Date(dateString));\n\n if (!date.isValid()) {\n dateString = cleanDateString(dateString);\n date = moment(new Date(dateString));\n }\n\n return date.isValid() ? date.toISOString() : null;\n}\n","import {\n cleanAttributes,\n cleanHeaders,\n cleanHOnes,\n cleanImages,\n cleanTags,\n removeEmpty,\n rewriteTopLevel,\n stripJunkTags,\n makeLinksAbsolute,\n} from 'utils/dom';\n\n// Clean our article content, returning a new, cleaned node.\nexport default function extractCleanNode(\n article,\n {\n $,\n cleanConditionally = true,\n title = '',\n url = '',\n }\n) {\n // Rewrite the tag name to div if it's a top level node like body or\n // html to avoid later complications with multiple body tags.\n rewriteTopLevel(article, $);\n\n // Drop small images and spacer images\n cleanImages(article, $);\n\n // Drop certain tags like , etc\n // This is -mostly- for cleanliness, not security.\n stripJunkTags(article, $);\n\n // H1 tags are typically the article title, which should be extracted\n // by the title extractor instead. If there's less than 3 of them (<3),\n // strip them. Otherwise, turn 'em into H2s.\n cleanHOnes(article, $);\n\n // Clean headers\n cleanHeaders(article, $, title);\n\n // Make links absolute\n makeLinksAbsolute(article, $, url);\n\n // Remove unnecessary attributes\n cleanAttributes(article);\n\n // We used to clean UL's and OL's here, but it was leading to\n // too many in-article lists being removed. Consider a better\n // way to detect menus particularly and remove them.\n cleanTags(article, $, cleanConditionally);\n\n // Remove empty paragraph nodes\n removeEmpty(article, $);\n\n return article;\n}\n","import { stripTags } from 'utils/dom';\n\nimport { TITLE_SPLITTERS_RE } from './constants';\nimport { resolveSplitTitle } from './index';\n\nexport default function cleanTitle(title, { url, $ }) {\n // If title has |, :, or - in it, see if\n // we can clean it up.\n if (TITLE_SPLITTERS_RE.test(title)) {\n title = resolveSplitTitle(title, url);\n }\n\n // Final sanity check that we didn't get a crazy title.\n // if (title.length > 150 || title.length < 15) {\n if (title.length > 150) {\n // If we did, return h1 from the document if it exists\n const h1 = $('h1');\n if (h1.length === 1) {\n title = h1.text();\n }\n }\n\n // strip any html tags in the title text\n return stripTags(title, $).trim();\n}\n\n","import URL from 'url';\nimport 'babel-polyfill';\nimport wuzzy from 'wuzzy';\n\nimport {\n TITLE_SPLITTERS_RE,\n DOMAIN_ENDINGS_RE,\n} from './constants';\n\nfunction extractBreadcrumbTitle(splitTitle, text) {\n // This must be a very breadcrumbed title, like:\n // The Best Gadgets on Earth : Bits : Blogs : NYTimes.com\n // NYTimes - Blogs - Bits - The Best Gadgets on Earth\n if (splitTitle.length >= 6) {\n // Look to see if we can find a breadcrumb splitter that happens\n // more than once. If we can, we'll be able to better pull out\n // the title.\n const termCounts = splitTitle.reduce((acc, titleText) => {\n acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;\n return acc;\n }, {});\n\n const [maxTerm, termCount] =\n Reflect.ownKeys(termCounts)\n .reduce((acc, key) => {\n if (acc[1] < termCounts[key]) {\n return [key, termCounts[key]];\n }\n\n return acc;\n }, [0, 0]);\n\n // We found a splitter that was used more than once, so it\n // is probably the breadcrumber. Split our title on that instead.\n // Note: max_term should be <= 4 characters, so that \" >> \"\n // will match, but nothing longer than that.\n if (termCount >= 2 && maxTerm.length <= 4) {\n splitTitle = text.split(maxTerm);\n }\n\n const splitEnds = [splitTitle[0], splitTitle.slice(-1)];\n const longestEnd = splitEnds.reduce((acc, end) => acc.length > end.length ? acc : end, '');\n\n if (longestEnd.length > 10) {\n return longestEnd;\n }\n\n return text;\n }\n\n return null;\n}\n\nfunction cleanDomainFromTitle(splitTitle, url) {\n // Search the ends of the title, looking for bits that fuzzy match\n // the URL too closely. If one is found, discard it and return the\n // rest.\n //\n // Strip out the big TLDs - it just makes the matching a bit more\n // accurate. Not the end of the world if it doesn't strip right.\n const { host } = URL.parse(url);\n const nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');\n\n const startSlug = splitTitle[0].toLowerCase().replace(' ', '');\n const startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);\n\n if (startSlugRatio > 0.4 && startSlug.length > 5) {\n return splitTitle.slice(2).join('');\n }\n\n const endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '');\n const endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);\n\n if (endSlugRatio > 0.4 && endSlug.length >= 5) {\n return splitTitle.slice(0, -2).join('');\n }\n\n return null;\n}\n\n// Given a title with separators in it (colons, dashes, etc),\n// resolve whether any of the segments should be removed.\nexport default function resolveSplitTitle(title, url = '') {\n // Splits while preserving splitters, like:\n // ['The New New York', ' - ', 'The Washington Post']\n const splitTitle = title.split(TITLE_SPLITTERS_RE);\n if (splitTitle.length === 1) {\n return title;\n }\n\n let newTitle = extractBreadcrumbTitle(splitTitle, title);\n if (newTitle) return newTitle;\n\n newTitle = cleanDomainFromTitle(splitTitle, url);\n if (newTitle) return newTitle;\n\n // Fuzzy ratio didn't find anything, so this title is probably legit.\n // Just return it all.\n return title;\n}\n","import cleanAuthor from './author';\nimport cleanImage from './lead-image-url';\nimport cleanDek from './dek';\nimport cleanDatePublished from './date-published';\nimport cleanContent from './content';\nimport cleanTitle from './title';\n\nconst Cleaners = {\n author: cleanAuthor,\n lead_image_url: cleanImage,\n dek: cleanDek,\n date_published: cleanDatePublished,\n content: cleanContent,\n title: cleanTitle,\n};\n\n\nexport default Cleaners;\n\nexport { cleanAuthor };\nexport { cleanImage };\nexport { cleanDek };\nexport { cleanDatePublished };\nexport { cleanContent };\nexport { cleanTitle };\nexport { default as resolveSplitTitle } from './resolve-split-title';\n","import {\n stripUnlikelyCandidates,\n convertToParagraphs,\n} from 'utils/dom';\n\nimport {\n scoreContent,\n findTopCandidate,\n} from './scoring';\n\n// Using a variety of scoring techniques, extract the content most\n// likely to be article text.\n//\n// If strip_unlikely_candidates is True, remove any elements that\n// match certain criteria first. (Like, does this element have a\n// classname of \"comment\")\n//\n// If weight_nodes is True, use classNames and IDs to determine the\n// worthiness of nodes.\n//\n// Returns a cheerio object $\nexport default function extractBestNode($, opts) {\n // clone the node so we can get back to our\n // initial parsed state if needed\n // TODO Do I need this? – AP\n // let $root = $.root().clone()\n\n\n if (opts.stripUnlikelyCandidates) {\n $ = stripUnlikelyCandidates($);\n }\n\n $ = convertToParagraphs($);\n $ = scoreContent($, opts.weightNodes);\n const $topCandidate = findTopCandidate($);\n\n return $topCandidate;\n}\n","import cheerio from 'cheerio';\nimport 'babel-polyfill';\n\nimport { nodeIsSufficient } from 'utils/dom';\nimport { cleanContent } from 'cleaners';\nimport { normalizeSpaces } from 'utils/text';\n\nimport extractBestNode from './extract-best-node';\n\nconst GenericContentExtractor = {\n defaultOpts: {\n stripUnlikelyCandidates: true,\n weightNodes: true,\n cleanConditionally: true,\n },\n\n // Extract the content for this resource - initially, pass in our\n // most restrictive opts which will return the highest quality\n // content. On each failure, retry with slightly more lax opts.\n //\n // :param return_type: string. If \"node\", should return the content\n // as a cheerio node rather than as an HTML string.\n //\n // Opts:\n // stripUnlikelyCandidates: Remove any elements that match\n // non-article-like criteria first.(Like, does this element\n // have a classname of \"comment\")\n //\n // weightNodes: Modify an elements score based on whether it has\n // certain classNames or IDs. Examples: Subtract if a node has\n // a className of 'comment', Add if a node has an ID of\n // 'entry-content'.\n //\n // cleanConditionally: Clean the node to return of some\n // superfluous content. Things like forms, ads, etc.\n extract({ $, html, title, url }, opts) {\n opts = { ...this.defaultOpts, ...opts };\n\n $ = $ || cheerio.load(html);\n\n // Cascade through our extraction-specific opts in an ordered fashion,\n // turning them off as we try to extract content.\n let node = this.getContentNode($, title, url, opts);\n\n if (nodeIsSufficient(node)) {\n return this.cleanAndReturnNode(node, $);\n }\n\n // We didn't succeed on first pass, one by one disable our\n // extraction opts and try again.\n for (const key of Reflect.ownKeys(opts).filter(k => opts[k] === true)) {\n opts[key] = false;\n $ = cheerio.load(html);\n\n node = this.getContentNode($, title, url, opts);\n\n if (nodeIsSufficient(node)) {\n break;\n }\n }\n\n return this.cleanAndReturnNode(node, $);\n },\n\n // Get node given current options\n getContentNode($, title, url, opts) {\n return cleanContent(\n extractBestNode($, opts),\n {\n $,\n cleanConditionally: opts.cleanConditionally,\n title,\n url,\n });\n },\n\n // Once we got here, either we're at our last-resort node, or\n // we broke early. Make sure we at least have -something- before we\n // move forward.\n cleanAndReturnNode(node, $) {\n if (!node) {\n return null;\n }\n\n return normalizeSpaces($.html(node));\n\n // if return_type == \"html\":\n // return normalize_spaces(node_to_html(node))\n // else:\n // return node\n },\n\n};\n\nexport default GenericContentExtractor;\n","// TODO: It would be great if we could merge the meta and selector lists into\n// a list of objects, because we could then rank them better. For example,\n// .hentry .entry-title is far better suited than .\n\n// An ordered list of meta tag names that denote likely article titles. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\nexport const STRONG_TITLE_META_TAGS = [\n 'tweetmeme-title',\n 'dc.title',\n 'rbtitle',\n 'headline',\n 'title',\n];\n\n// og:title is weak because it typically contains context that we don't like,\n// for example the source site's name. Gotta get that brand into facebook!\nexport const WEAK_TITLE_META_TAGS = [\n 'og:title',\n];\n\n// An ordered list of XPath Selectors to find likely article titles. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nexport const STRONG_TITLE_SELECTORS = [\n '.hentry .entry-title',\n 'h1#articleHeader',\n 'h1.articleHeader',\n 'h1.article',\n '.instapaper_title',\n '#meebo-title',\n];\n\nexport const WEAK_TITLE_SELECTORS = [\n 'article h1',\n '#entry-title',\n '.entry-title',\n '#entryTitle',\n '#entrytitle',\n '.entryTitle',\n '.entrytitle',\n '#articleTitle',\n '.articleTitle',\n 'post post-title',\n 'h1.title',\n 'h2.article',\n 'h1',\n 'html head title',\n 'title',\n];\n","import { cleanTitle } from 'cleaners';\nimport {\n extractFromMeta,\n extractFromSelectors,\n} from 'utils/dom';\n\nimport {\n STRONG_TITLE_META_TAGS,\n WEAK_TITLE_META_TAGS,\n STRONG_TITLE_SELECTORS,\n WEAK_TITLE_SELECTORS,\n} from './constants';\n\nconst GenericTitleExtractor = {\n extract({ $, url, metaCache }) {\n // First, check to see if we have a matching meta tag that we can make\n // use of that is strongly associated with the headline.\n let title;\n\n title = extractFromMeta($, STRONG_TITLE_META_TAGS, metaCache);\n if (title) return cleanTitle(title, { url, $ });\n\n // Second, look through our content selectors for the most likely\n // article title that is strongly associated with the headline.\n title = extractFromSelectors($, STRONG_TITLE_SELECTORS);\n if (title) return cleanTitle(title, { url, $ });\n\n // Third, check for weaker meta tags that may match.\n title = extractFromMeta($, WEAK_TITLE_META_TAGS, metaCache);\n if (title) return cleanTitle(title, { url, $ });\n\n // Last, look for weaker selector tags that may match.\n title = extractFromSelectors($, WEAK_TITLE_SELECTORS);\n if (title) return cleanTitle(title, { url, $ });\n\n // If no matches, return an empty string\n return '';\n },\n};\n\nexport default GenericTitleExtractor;\n","// An ordered list of meta tag names that denote likely article authors. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\n//\n// Note: \"author\" is too often the -developer- of the page, so it is not\n// added here.\nexport const AUTHOR_META_TAGS = [\n 'byl',\n 'clmst',\n 'dc.author',\n 'dcsext.author',\n 'dc.creator',\n 'rbauthors',\n 'authors',\n];\n\nexport const AUTHOR_MAX_LENGTH = 300;\n\n// An ordered list of XPath Selectors to find likely article authors. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nexport const AUTHOR_SELECTORS = [\n '.entry .entry-author',\n '.author.vcard .fn',\n '.author .vcard .fn',\n '.byline.vcard .fn',\n '.byline .vcard .fn',\n '.byline .by .author',\n '.byline .by',\n '.byline .author',\n '.post-author.vcard',\n '.post-author .vcard',\n 'a[rel=author]',\n '#by_author',\n '.by_author',\n '#entryAuthor',\n '.entryAuthor',\n '.byline a[href*=author]',\n '#author .authorname',\n '.author .authorname',\n '#author',\n '.author',\n '.articleauthor',\n '.ArticleAuthor',\n '.byline',\n];\n\n// An ordered list of Selectors to find likely article authors, with\n// regular expression for content.\nconst bylineRe = /^[\\n\\s]*By/i;\nexport const BYLINE_SELECTORS_RE = [\n ['#byline', bylineRe],\n ['.byline', bylineRe],\n];\n","import { cleanAuthor } from 'cleaners';\nimport {\n extractFromMeta,\n extractFromSelectors,\n} from 'utils/dom';\n\nimport {\n AUTHOR_META_TAGS,\n AUTHOR_MAX_LENGTH,\n AUTHOR_SELECTORS,\n BYLINE_SELECTORS_RE,\n} from './constants';\n\nconst GenericAuthorExtractor = {\n extract({ $, metaCache }) {\n let author;\n\n // First, check to see if we have a matching\n // meta tag that we can make use of.\n author = extractFromMeta($, AUTHOR_META_TAGS, metaCache);\n if (author && author.length < AUTHOR_MAX_LENGTH) {\n return cleanAuthor(author);\n }\n\n // Second, look through our selectors looking for potential authors.\n author = extractFromSelectors($, AUTHOR_SELECTORS, 2);\n if (author && author.length < AUTHOR_MAX_LENGTH) {\n return cleanAuthor(author);\n }\n\n // Last, use our looser regular-expression based selectors for\n // potential authors.\n for (const [selector, regex] of BYLINE_SELECTORS_RE) {\n const node = $(selector);\n if (node.length === 1) {\n const text = node.text();\n if (regex.test(text)) {\n return cleanAuthor(text);\n }\n }\n }\n\n return null;\n },\n};\n\nexport default GenericAuthorExtractor;\n\n","// An ordered list of meta tag names that denote\n// likely date published dates. All attributes\n// should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nexport const DATE_PUBLISHED_META_TAGS = [\n 'article:published_time',\n 'displaydate',\n 'dc.date',\n 'dc.date.issued',\n 'rbpubdate',\n 'publish_date',\n 'pub_date',\n 'pagedate',\n 'pubdate',\n 'revision_date',\n 'doc_date',\n 'date_created',\n 'content_create_date',\n 'lastmodified',\n 'created',\n 'date',\n];\n\n// An ordered list of XPath Selectors to find\n// likely date published dates. From most explicit\n// to least explicit.\nexport const DATE_PUBLISHED_SELECTORS = [\n '.hentry .dtstamp.published',\n '.hentry .published',\n '.hentry .dtstamp.updated',\n '.hentry .updated',\n '.single .published',\n '.meta .published',\n '.meta .postDate',\n '.entry-date',\n '.byline .date',\n '.postmetadata .date',\n '.article_datetime',\n '.date-header',\n '.story-date',\n '.dateStamp',\n '#story .datetime',\n '.dateline',\n '.pubdate',\n];\n\n// An ordered list of compiled regular expressions to find likely date\n// published dates from the URL. These should always have the first\n// reference be a date string that is parseable by dateutil.parser.parse\nconst abbrevMonthsStr = '(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)';\nexport const DATE_PUBLISHED_URL_RES = [\n // /2012/01/27/ but not /2012/01/293\n new RegExp('/(20\\\\d{2}/\\\\d{2}/\\\\d{2})/', 'i'),\n // 20120127 or 20120127T but not 2012012733 or 8201201733\n // /[^0-9](20\\d{2}[01]\\d[0-3]\\d)([^0-9]|$)/i,\n // 2012-01-27\n new RegExp('(20\\\\d{2}-[01]\\\\d-[0-3]\\\\d)', 'i'),\n // /2012/jan/27/\n new RegExp(`/(20\\\\d{2}/${abbrevMonthsStr}/[0-3]\\\\d)/`, 'i'),\n];\n\n","import { cleanDatePublished } from 'cleaners';\nimport {\n extractFromMeta,\n extractFromSelectors,\n} from 'utils/dom';\nimport { extractFromUrl } from 'utils/text';\n\nimport {\n DATE_PUBLISHED_META_TAGS,\n DATE_PUBLISHED_SELECTORS,\n DATE_PUBLISHED_URL_RES,\n} from './constants';\n\nconst GenericDatePublishedExtractor = {\n extract({ $, url, metaCache }) {\n let datePublished;\n // First, check to see if we have a matching meta tag\n // that we can make use of.\n // Don't try cleaning tags from this string\n datePublished = extractFromMeta($, DATE_PUBLISHED_META_TAGS, metaCache, false);\n if (datePublished) return cleanDatePublished(datePublished);\n\n // Second, look through our selectors looking for potential\n // date_published's.\n datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS);\n if (datePublished) return cleanDatePublished(datePublished);\n\n // Lastly, look to see if a dately string exists in the URL\n datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);\n if (datePublished) return cleanDatePublished(datePublished);\n\n return null;\n },\n};\n\nexport default GenericDatePublishedExtractor;\n","// import {\n// DEK_META_TAGS,\n// DEK_SELECTORS,\n// DEK_URL_RES,\n// } from './constants';\n\n// import { cleanDek } from 'cleaners';\n\n// import {\n// extractFromMeta,\n// extractFromSelectors,\n// } from 'utils/dom';\n\n// Currently there is only one selector for\n// deks. We should simply return null here\n// until we have a more robust generic option.\n// Below is the original source for this, for reference.\nconst GenericDekExtractor = {\n // extract({ $, content, metaCache }) {\n extract() {\n return null;\n },\n};\n\nexport default GenericDekExtractor;\n\n// def extract_dek(self):\n// # First, check to see if we have a matching meta tag that we can make\n// # use of.\n// dek = self.extract_from_meta('dek', constants.DEK_META_TAGS)\n// if not dek:\n// # Second, look through our CSS/XPath selectors. This may return\n// # an HTML fragment.\n// dek = self.extract_from_selectors('dek',\n// constants.DEK_SELECTORS,\n// text_only=False)\n//\n// if dek:\n// # Make sure our dek isn't in the first few thousand characters\n// # of the content, otherwise it's just the start of the article\n// # and not a true dek.\n// content = self.extract_content()\n// content_chunk = normalize_spaces(strip_tags(content[:2000]))\n// dek_chunk = normalize_spaces(dek[:100]) # Already has no tags.\n//\n// # 80% or greater similarity means the dek was very similar to some\n// # of the starting content, so we skip it.\n// if fuzz.partial_ratio(content_chunk, dek_chunk) < 80:\n// return dek\n//\n// return None\n","// An ordered list of meta tag names that denote likely article leading images.\n// All attributes should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nexport const LEAD_IMAGE_URL_META_TAGS = [\n 'og:image',\n 'twitter:image',\n 'image_src',\n];\n\nexport const LEAD_IMAGE_URL_SELECTORS = [\n 'link[rel=image_src]',\n];\n\nexport const POSITIVE_LEAD_IMAGE_URL_HINTS = [\n 'upload',\n 'wp-content',\n 'large',\n 'photo',\n 'wp-image',\n];\nexport const POSITIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(POSITIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');\n\nexport const NEGATIVE_LEAD_IMAGE_URL_HINTS = [\n 'spacer',\n 'sprite',\n 'blank',\n 'throbber',\n 'gradient',\n 'tile',\n 'bg',\n 'background',\n 'icon',\n 'social',\n 'header',\n 'hdr',\n 'advert',\n 'spinner',\n 'loader',\n 'loading',\n 'default',\n 'rating',\n 'share',\n 'facebook',\n 'twitter',\n 'theme',\n 'promo',\n 'ads',\n 'wp-includes',\n];\nexport const NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');\n\nexport const GIF_RE = /\\.gif(\\?.*)?$/i;\nexport const JPG_RE = /\\.jpe?g(\\?.*)?$/i;\n","import {\n POSITIVE_LEAD_IMAGE_URL_HINTS_RE,\n NEGATIVE_LEAD_IMAGE_URL_HINTS_RE,\n GIF_RE,\n JPG_RE,\n} from './constants';\n\nimport { PHOTO_HINTS_RE } from '../content/scoring/constants';\n\nfunction getSig($node) {\n return `${$node.attr('class') || ''} ${$node.attr('id') || ''}`;\n}\n\n// Scores image urls based on a variety of heuristics.\nexport function scoreImageUrl(url) {\n url = url.trim();\n let score = 0;\n\n if (POSITIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n score += 20;\n }\n\n if (NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n score -= 20;\n }\n\n // TODO: We might want to consider removing this as\n // gifs are much more common/popular than they once were\n if (GIF_RE.test(url)) {\n score -= 10;\n }\n\n if (JPG_RE.test(url)) {\n score += 10;\n }\n\n // PNGs are neutral.\n\n return score;\n}\n\n// Alt attribute usually means non-presentational image.\nexport function scoreAttr($img) {\n if ($img.attr('alt')) {\n return 5;\n }\n\n return 0;\n}\n\n// Look through our parent and grandparent for figure-like\n// container elements, give a bonus if we find them\nexport function scoreByParents($img) {\n let score = 0;\n const $figParent = $img.parents('figure').first();\n\n if ($figParent.length === 1) {\n score += 25;\n }\n\n const $parent = $img.parent();\n let $gParent;\n if ($parent.length === 1) {\n $gParent = $parent.parent();\n }\n\n [$parent, $gParent].forEach(($node) => {\n if (PHOTO_HINTS_RE.test(getSig($node))) {\n score += 15;\n }\n });\n\n return score;\n}\n\n// Look at our immediate sibling and see if it looks like it's a\n// caption. Bonus if so.\nexport function scoreBySibling($img) {\n let score = 0;\n const $sibling = $img.next();\n const sibling = $sibling.get(0);\n\n if (sibling && sibling.tagName === 'figcaption') {\n score += 25;\n }\n\n if (PHOTO_HINTS_RE.test(getSig($sibling))) {\n score += 15;\n }\n\n return score;\n}\n\nexport function scoreByDimensions($img) {\n let score = 0;\n\n const width = parseFloat($img.attr('width'));\n const height = parseFloat($img.attr('height'));\n const src = $img.attr('src');\n\n // Penalty for skinny images\n if (width && width <= 50) {\n score -= 50;\n }\n\n // Penalty for short images\n if (height && height <= 50) {\n score -= 50;\n }\n\n if (width && height && !src.includes('sprite')) {\n const area = width * height;\n if (area < 5000) { // Smaller than 50 x 100\n score -= 100;\n } else {\n score += Math.round(area / 1000);\n }\n }\n\n return score;\n}\n\nexport function scoreByPosition($imgs, index) {\n return ($imgs.length / 2) - index;\n}\n","import 'babel-polyfill';\n\nimport { extractFromMeta } from 'utils/dom';\nimport { cleanImage } from 'cleaners';\n\nimport {\n LEAD_IMAGE_URL_META_TAGS,\n LEAD_IMAGE_URL_SELECTORS,\n} from './constants';\n\nimport {\n scoreImageUrl,\n scoreAttr,\n scoreByParents,\n scoreBySibling,\n scoreByDimensions,\n scoreByPosition,\n} from './score-image';\n\n// Given a resource, try to find the lead image URL from within\n// it. Like content and next page extraction, uses a scoring system\n// to determine what the most likely image may be. Short circuits\n// on really probable things like og:image meta tags.\n//\n// Potential signals to still take advantage of:\n// * domain\n// * weird aspect ratio\nconst GenericLeadImageUrlExtractor = {\n extract({ $, content, metaCache }) {\n let cleanUrl;\n\n // Check to see if we have a matching meta tag that we can make use of.\n // Moving this higher because common practice is now to use large\n // images on things like Open Graph or Twitter cards.\n // images usually have for things like Open Graph.\n const imageUrl =\n extractFromMeta(\n $,\n LEAD_IMAGE_URL_META_TAGS,\n metaCache,\n false\n );\n\n if (imageUrl) {\n cleanUrl = cleanImage(imageUrl);\n\n if (cleanUrl) return cleanUrl;\n }\n\n // Next, try to find the \"best\" image via the content.\n // We'd rather not have to fetch each image and check dimensions,\n // so try to do some analysis and determine them instead.\n const imgs = $('img', content).toArray();\n const imgScores = {};\n\n imgs.forEach((img, index) => {\n const $img = $(img);\n const src = $img.attr('src');\n\n if (!src) return;\n\n let score = scoreImageUrl(src);\n score += scoreAttr($img);\n score += scoreByParents($img);\n score += scoreBySibling($img);\n score += scoreByDimensions($img);\n score += scoreByPosition(imgs, index);\n\n imgScores[src] = score;\n });\n\n const [topUrl, topScore] =\n Reflect.ownKeys(imgScores).reduce((acc, key) =>\n imgScores[key] > acc[1] ? [key, imgScores[key]] : acc\n , [null, 0]);\n\n if (topScore > 0) {\n cleanUrl = cleanImage(topUrl);\n\n if (cleanUrl) return cleanUrl;\n }\n\n // If nothing else worked, check to see if there are any really\n // probable nodes in the doc, like .\n for (const selector of LEAD_IMAGE_URL_SELECTORS) {\n const $node = $(selector).first();\n const src = $node.attr('src');\n if (src) {\n cleanUrl = cleanImage(src);\n if (cleanUrl) return cleanUrl;\n }\n\n const href = $node.attr('href');\n if (href) {\n cleanUrl = cleanImage(href);\n if (cleanUrl) return cleanUrl;\n }\n\n const value = $node.attr('value');\n if (value) {\n cleanUrl = cleanImage(value);\n if (cleanUrl) return cleanUrl;\n }\n }\n\n return null;\n },\n};\n\nexport default GenericLeadImageUrlExtractor;\n\n// def extract(self):\n// \"\"\"\n// # First, try to find the \"best\" image via the content.\n// # We'd rather not have to fetch each image and check dimensions,\n// # so try to do some analysis and determine them instead.\n// content = self.extractor.extract_content(return_type=\"node\")\n// imgs = content.xpath('.//img')\n// img_scores = defaultdict(int)\n// logger.debug('Scoring %d images from content', len(imgs))\n// for (i, img) in enumerate(imgs):\n// img_score = 0\n//\n// if not 'src' in img.attrib:\n// logger.debug('No src attribute found')\n// continue\n//\n// try:\n// parsed_img = urlparse(img.attrib['src'])\n// img_path = parsed_img.path.lower()\n// except ValueError:\n// logger.debug('ValueError getting img path.')\n// continue\n// logger.debug('Image path is %s', img_path)\n//\n// if constants.POSITIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):\n// logger.debug('Positive URL hints match. Adding 20.')\n// img_score += 20\n//\n// if constants.NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):\n// logger.debug('Negative URL hints match. Subtracting 20.')\n// img_score -= 20\n//\n// # Gifs are more often structure than photos\n// if img_path.endswith('gif'):\n// logger.debug('gif found. Subtracting 10.')\n// img_score -= 10\n//\n// # JPGs are more often photographs\n// if img_path.endswith('jpg'):\n// logger.debug('jpg found. Adding 10.')\n// img_score += 10\n//\n// # PNGs are neutral.\n//\n// # Alt attribute usually means non-presentational image.\n// if 'alt' in img.attrib and len(img.attrib['alt']) > 5:\n// logger.debug('alt attribute found. Adding 5.')\n// img_score += 5\n//\n// # Look through our parent and grandparent for figure-like\n// # container elements, give a bonus if we find them\n// parents = [img.getparent()]\n// if parents[0] is not None and parents[0].getparent() is not None:\n// parents.append(parents[0].getparent())\n// for p in parents:\n// if p.tag == 'figure':\n// logger.debug('Parent with
tag found. Adding 25.')\n// img_score += 25\n//\n// p_sig = ' '.join([p.get('id', ''), p.get('class', '')])\n// if constants.PHOTO_HINTS_RE.search(p_sig):\n// logger.debug('Photo hints regex match. Adding 15.')\n// img_score += 15\n//\n// # Look at our immediate sibling and see if it looks like it's a\n// # caption. Bonus if so.\n// sibling = img.getnext()\n// if sibling is not None:\n// if sibling.tag == 'figcaption':\n// img_score += 25\n//\n// sib_sig = ' '.join([sibling.get('id', ''),\n// sibling.get('class', '')]).lower()\n// if 'caption' in sib_sig:\n// img_score += 15\n//\n// # Pull out width/height if they were set.\n// img_width = None\n// img_height = None\n// if 'width' in img.attrib:\n// try:\n// img_width = float(img.get('width'))\n// except ValueError:\n// pass\n// if 'height' in img.attrib:\n// try:\n// img_height = float(img.get('height'))\n// except ValueError:\n// pass\n//\n// # Penalty for skinny images\n// if img_width and img_width <= 50:\n// logger.debug('Skinny image found. Subtracting 50.')\n// img_score -= 50\n//\n// # Penalty for short images\n// if img_height and img_height <= 50:\n// # Wide, short images are more common than narrow, tall ones\n// logger.debug('Short image found. Subtracting 25.')\n// img_score -= 25\n//\n// if img_width and img_height and not 'sprite' in img_path:\n// area = img_width * img_height\n//\n// if area < 5000: # Smaller than 50x100\n// logger.debug('Image with small area found. Subtracting 100.')\n// img_score -= 100\n// else:\n// img_score += round(area/1000.0)\n//\n// # If the image is higher on the page than other images,\n// # it gets a bonus. Penalty if lower.\n// logger.debug('Adding page placement bonus of %d.', len(imgs)/2 - i)\n// img_score += len(imgs)/2 - i\n//\n// # Use the raw src here because we munged img_path for case\n// # insensitivity\n// logger.debug('Final score is %d.', img_score)\n// img_scores[img.attrib['src']] += img_score\n//\n// top_score = 0\n// top_url = None\n// for (url, score) in img_scores.items():\n// if score > top_score:\n// top_url = url\n// top_score = score\n//\n// if top_score > 0:\n// logger.debug('Using top score image from content. Score was %d', top_score)\n// return top_url\n//\n//\n// # If nothing else worked, check to see if there are any really\n// # probable nodes in the doc, like .\n// logger.debug('Trying to find lead image in probable nodes')\n// for selector in constants.LEAD_IMAGE_URL_SELECTORS:\n// nodes = self.resource.extract_by_selector(selector)\n// for node in nodes:\n// clean_value = None\n// if node.attrib.get('src'):\n// clean_value = self.clean(node.attrib['src'])\n//\n// if not clean_value and node.attrib.get('href'):\n// clean_value = self.clean(node.attrib['href'])\n//\n// if not clean_value and node.attrib.get('value'):\n// clean_value = self.clean(node.attrib['value'])\n//\n// if clean_value:\n// logger.debug('Found lead image in probable nodes.')\n// logger.debug('Node was: %s', node)\n// return clean_value\n//\n// return None\n","import difflib from 'difflib';\n\nexport default function scoreSimilarity(score, articleUrl, href) {\n // Do this last and only if we have a real candidate, because it's\n // potentially expensive computationally. Compare the link to this\n // URL using difflib to get the % similarity of these URLs. On a\n // sliding scale, subtract points from this link based on\n // similarity.\n if (score > 0) {\n const similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio();\n // Subtract .1 from diff_percent when calculating modifier,\n // which means that if it's less than 10% different, we give a\n // bonus instead. Ex:\n // 3% different = +17.5 points\n // 10% different = 0 points\n // 20% different = -25 points\n const diffPercent = 1.0 - similarity;\n const diffModifier = -(250 * (diffPercent - 0.2));\n return score + diffModifier;\n }\n\n return 0;\n}\n","import { IS_DIGIT_RE } from 'utils/text/constants';\n\nexport default function scoreLinkText(linkText, pageNum) {\n // If the link text can be parsed as a number, give it a minor\n // bonus, with a slight bias towards lower numbered pages. This is\n // so that pages that might not have 'next' in their text can still\n // get scored, and sorted properly by score.\n let score = 0;\n\n if (IS_DIGIT_RE.test(linkText.trim())) {\n const linkTextAsNum = parseInt(linkText, 10);\n // If it's the first page, we already got it on the first call.\n // Give it a negative score. Otherwise, up to page 10, give a\n // small bonus.\n if (linkTextAsNum < 2) {\n score = -30;\n } else {\n score = Math.max(0, 10 - linkTextAsNum);\n }\n\n // If it appears that the current page number is greater than\n // this links page number, it's a very bad sign. Give it a big\n // penalty.\n if (pageNum && pageNum >= linkTextAsNum) {\n score -= 50;\n }\n }\n\n return score;\n}\n","export default function scorePageInLink(pageNum, isWp) {\n // page in the link = bonus. Intentionally ignore wordpress because\n // their ?p=123 link style gets caught by this even though it means\n // separate documents entirely.\n if (pageNum && !isWp) {\n return 50;\n }\n\n return 0;\n}\n","export const DIGIT_RE = /\\d/;\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nexport const EXTRANEOUS_LINK_HINTS = [\n 'print',\n 'archive',\n 'comment',\n 'discuss',\n 'e-mail',\n 'email',\n 'share',\n 'reply',\n 'all',\n 'login',\n 'sign',\n 'single',\n 'adx',\n 'entry-unrelated',\n];\nexport const EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\nexport const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^|]|$)|»([^|]|$))', 'i');\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nexport const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nexport const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nexport const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n\n","import { EXTRANEOUS_LINK_HINTS_RE } from '../constants';\n\nexport default function scoreExtraneousLinks(href) {\n // If the URL itself contains extraneous values, give a penalty.\n if (EXTRANEOUS_LINK_HINTS_RE.test(href)) {\n return -25;\n }\n\n return 0;\n}\n","import { range } from 'utils';\nimport {\n NEGATIVE_SCORE_RE,\n POSITIVE_SCORE_RE,\n PAGE_RE,\n} from 'utils/dom/constants';\nimport { EXTRANEOUS_LINK_HINTS_RE } from '../constants';\n\nfunction makeSig($link) {\n return `${$link.attr('class') || ''} ${$link.attr('id') || ''}`;\n}\n\nexport default function scoreByParents($link) {\n // If a parent node contains paging-like classname or id, give a\n // bonus. Additionally, if a parent_node contains bad content\n // (like 'sponsor'), give a penalty.\n let $parent = $link.parent();\n let positiveMatch = false;\n let negativeMatch = false;\n let score = 0;\n\n Array.from(range(0, 4)).forEach(() => {\n if ($parent.length === 0) {\n return;\n }\n\n const parentData = makeSig($parent, ' ');\n\n // If we have 'page' or 'paging' in our data, that's a good\n // sign. Add a bonus.\n if (!positiveMatch && PAGE_RE.test(parentData)) {\n positiveMatch = true;\n score += 25;\n }\n\n // If we have 'comment' or something in our data, and\n // we don't have something like 'content' as well, that's\n // a bad sign. Give a penalty.\n if (!negativeMatch && NEGATIVE_SCORE_RE.test(parentData)\n && EXTRANEOUS_LINK_HINTS_RE.test(parentData)) {\n if (!POSITIVE_SCORE_RE.test(parentData)) {\n negativeMatch = true;\n score -= 25;\n }\n }\n\n $parent = $parent.parent();\n });\n\n return score;\n}\n\n","import { PREV_LINK_TEXT_RE } from '../constants';\n\nexport default function scorePrevLink(linkData) {\n // If the link has something like \"previous\", its definitely\n // an old link, skip it.\n if (PREV_LINK_TEXT_RE.test(linkData)) {\n return -200;\n }\n\n return 0;\n}\n","import URL from 'url';\n\nimport {\n DIGIT_RE,\n EXTRANEOUS_LINK_HINTS_RE,\n} from '../constants';\n\nexport default function shouldScore(\n href,\n articleUrl,\n baseUrl,\n parsedUrl,\n linkText,\n previousUrls\n) {\n // skip if we've already fetched this url\n if (previousUrls.find(url => href === url) !== undefined) {\n return false;\n }\n\n // If we've already parsed this URL, or the URL matches the base\n // URL, or is empty, skip it.\n if (!href || href === articleUrl || href === baseUrl) {\n return false;\n }\n\n const { hostname } = parsedUrl;\n const { hostname: linkHost } = URL.parse(href);\n\n // Domain mismatch.\n if (linkHost !== hostname) {\n return false;\n }\n\n // If href doesn't contain a digit after removing the base URL,\n // it's certainly not the next page.\n const fragment = href.replace(baseUrl, '');\n if (!DIGIT_RE.test(fragment)) {\n return false;\n }\n\n // This link has extraneous content (like \"comment\") in its link\n // text, so we skip it.\n if (EXTRANEOUS_LINK_HINTS_RE.test(linkText)) {\n return false;\n }\n\n // Next page link text is never long, skip if it is too long.\n if (linkText.length > 25) {\n return false;\n }\n\n return true;\n}\n\n","export default function scoreBaseUrl(href, baseRegex) {\n // If the baseUrl isn't part of this URL, penalize this\n // link. It could still be the link, but the odds are lower.\n // Example:\n // http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html\n if (!baseRegex.test(href)) {\n return -25;\n }\n\n return 0;\n}\n","import { NEXT_LINK_TEXT_RE } from '../constants';\n\nexport default function scoreNextLinkText(linkData) {\n // Things like \"next\", \">>\", etc.\n if (NEXT_LINK_TEXT_RE.test(linkData)) {\n return 50;\n }\n\n return 0;\n}\n","import {\n NEXT_LINK_TEXT_RE,\n CAP_LINK_TEXT_RE,\n} from '../constants';\n\nexport default function scoreCapLinks(linkData) {\n // Cap links are links like \"last\", etc.\n if (CAP_LINK_TEXT_RE.test(linkData)) {\n // If we found a link like \"last\", but we've already seen that\n // this link is also \"next\", it's fine. If it's not been\n // previously marked as \"next\", then it's probably bad.\n // Penalize.\n if (NEXT_LINK_TEXT_RE.test(linkData)) {\n return -65;\n }\n }\n\n return 0;\n}\n","import 'babel-polyfill';\nimport URL from 'url';\n\nimport { isWordpress } from 'utils/dom';\nimport {\n removeAnchor,\n pageNumFromUrl,\n} from 'utils/text';\n\nimport {\n scoreSimilarity,\n scoreLinkText,\n scorePageInLink,\n scoreExtraneousLinks,\n scoreByParents,\n scorePrevLink,\n shouldScore,\n scoreBaseUrl,\n scoreCapLinks,\n scoreNextLinkText,\n} from './utils';\n\nexport function makeBaseRegex(baseUrl) {\n return new RegExp(`^${baseUrl}`, 'i');\n}\n\nfunction makeSig($link, linkText) {\n return `${linkText || $link.text()} ${$link.attr('class') || ''} ${$link.attr('id') || ''}`;\n}\n\nexport default function scoreLinks({\n links,\n articleUrl,\n baseUrl,\n parsedUrl,\n $,\n previousUrls = [],\n}) {\n parsedUrl = parsedUrl || URL.parse(articleUrl);\n const baseRegex = makeBaseRegex(baseUrl);\n const isWp = isWordpress($);\n\n // Loop through all links, looking for hints that they may be next-page\n // links. Things like having \"page\" in their textContent, className or\n // id, or being a child of a node with a page-y className or id.\n //\n // After we do that, assign each page a score, and pick the one that\n // looks most like the next page link, as long as its score is strong\n // enough to have decent confidence.\n const scoredPages = links.reduce((possiblePages, link) => {\n // Remove any anchor data since we don't do a good job\n // standardizing URLs (it's hard), we're going to do\n // some checking with and without a trailing slash\n const href = removeAnchor(link.attribs.href);\n const $link = $(link);\n const linkText = $link.text();\n\n if (!shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls)) {\n return possiblePages;\n }\n\n // ## PASSED THE FIRST-PASS TESTS. Start scoring. ##\n if (!possiblePages[href]) {\n possiblePages[href] = {\n score: 0,\n linkText,\n href,\n };\n } else {\n possiblePages[href].linkText = `${possiblePages[href].linkText}|${linkText}`;\n }\n\n const possiblePage = possiblePages[href];\n const linkData = makeSig($link, linkText);\n const pageNum = pageNumFromUrl(href);\n\n let score = scoreBaseUrl(href, baseRegex);\n score += scoreNextLinkText(linkData);\n score += scoreCapLinks(linkData);\n score += scorePrevLink(linkData);\n score += scoreByParents($link);\n score += scoreExtraneousLinks(href);\n score += scorePageInLink(pageNum, isWp);\n score += scoreLinkText(linkText, pageNum);\n score += scoreSimilarity(score, articleUrl, href);\n\n possiblePage.score = score;\n\n return possiblePages;\n }, {});\n\n return Reflect.ownKeys(scoredPages).length === 0 ? null : scoredPages;\n}\n","import 'babel-polyfill';\nimport URL from 'url';\n\nimport {\n articleBaseUrl,\n removeAnchor,\n} from 'utils/text';\nimport scoreLinks from './scoring/score-links';\n\n// Looks for and returns next page url\n// for multi-page articles\nconst GenericNextPageUrlExtractor = {\n extract({ $, url, parsedUrl, previousUrls = [] }) {\n parsedUrl = parsedUrl || URL.parse(url);\n\n const articleUrl = removeAnchor(url);\n const baseUrl = articleBaseUrl(url, parsedUrl);\n\n const links = $('a[href]').toArray();\n\n const scoredLinks = scoreLinks({\n links,\n articleUrl,\n baseUrl,\n parsedUrl,\n $,\n previousUrls,\n });\n\n // If no links were scored, return null\n if (!scoredLinks) return null;\n\n // now that we've scored all possible pages,\n // find the biggest one.\n const topPage = Reflect.ownKeys(scoredLinks).reduce((acc, link) => {\n const scoredLink = scoredLinks[link];\n return scoredLink.score > acc.score ? scoredLink : acc;\n }, { score: -100 });\n\n // If the score is less than 50, we're not confident enough to use it,\n // so we fail.\n if (topPage.score >= 50) {\n return topPage.href;\n }\n\n return null;\n },\n};\n\n\nexport default GenericNextPageUrlExtractor;\n","export const CANONICAL_META_SELECTORS = [\n 'og:url',\n];\n","import URL from 'url';\nimport { extractFromMeta } from 'utils/dom';\n\nimport { CANONICAL_META_SELECTORS } from './constants';\n\nfunction parseDomain(url) {\n const parsedUrl = URL.parse(url);\n const { hostname } = parsedUrl;\n return hostname;\n}\n\nfunction result(url) {\n return {\n url,\n domain: parseDomain(url),\n };\n}\n\nconst GenericUrlExtractor = {\n extract({ $, url, metaCache }) {\n const $canonical = $('link[rel=canonical]');\n if ($canonical.length !== 0) {\n const href = $canonical.attr('href');\n if (href) {\n return result(href);\n }\n }\n\n const metaUrl = extractFromMeta($, CANONICAL_META_SELECTORS, metaCache);\n if (metaUrl) {\n return result(metaUrl);\n }\n\n return result(url);\n },\n\n};\n\nexport default GenericUrlExtractor;\n","export const EXCERPT_META_SELECTORS = [\n 'og:description',\n 'twitter:description',\n];\n","import ellipsize from 'ellipsize';\n\nimport {\n extractFromMeta,\n stripTags,\n} from 'utils/dom';\n\nimport { EXCERPT_META_SELECTORS } from './constants';\n\nexport function clean(content, $, maxLength = 200) {\n content = content.replace(/[\\s\\n]+/g, ' ').trim();\n return ellipsize(content, maxLength, { ellipse: '…' });\n}\n\nconst GenericExcerptExtractor = {\n extract({ $, content, metaCache }) {\n const excerpt = extractFromMeta($, EXCERPT_META_SELECTORS, metaCache);\n if (excerpt) {\n return clean(stripTags(excerpt, $));\n }\n // Fall back to excerpting from the extracted content\n const maxLength = 200;\n const shortContent = content.slice(0, maxLength * 5);\n return clean($(shortContent).text(), $, maxLength);\n },\n};\n\nexport default GenericExcerptExtractor;\n","import cheerio from 'cheerio';\n\nimport { normalizeSpaces } from 'utils/text';\n\nconst GenericWordCountExtractor = {\n extract({ content }) {\n const $ = cheerio.load(content);\n\n const text = normalizeSpaces($('div').first().text());\n return text.split(/\\s/).length;\n },\n};\n\nexport default GenericWordCountExtractor;\n","import cheerio from 'cheerio';\nimport stringDirection from 'string-direction';\n\nimport GenericContentExtractor from './content/extractor';\nimport GenericTitleExtractor from './title/extractor';\nimport GenericAuthorExtractor from './author/extractor';\nimport GenericDatePublishedExtractor from './date-published/extractor';\nimport GenericDekExtractor from './dek/extractor';\nimport GenericLeadImageUrlExtractor from './lead-image-url/extractor';\nimport GenericNextPageUrlExtractor from './next-page-url/extractor';\nimport GenericUrlExtractor from './url/extractor';\nimport GenericExcerptExtractor from './excerpt/extractor';\nimport GenericWordCountExtractor from './word-count/extractor';\n\nconst GenericExtractor = {\n // This extractor is the default for all domains\n domain: '*',\n title: GenericTitleExtractor.extract,\n date_published: GenericDatePublishedExtractor.extract,\n author: GenericAuthorExtractor.extract,\n content: GenericContentExtractor.extract.bind(GenericContentExtractor),\n lead_image_url: GenericLeadImageUrlExtractor.extract,\n dek: GenericDekExtractor.extract,\n next_page_url: GenericNextPageUrlExtractor.extract,\n url_and_domain: GenericUrlExtractor.extract,\n excerpt: GenericExcerptExtractor.extract,\n word_count: GenericWordCountExtractor.extract,\n direction: ({ title }) => stringDirection.getDirection(title),\n\n extract(options) {\n const { html } = options;\n\n if (html) {\n const $ = cheerio.load(html);\n options.$ = $;\n }\n\n const title = this.title(options);\n const date_published = this.date_published(options);\n const author = this.author(options);\n const content = this.content({ ...options, title });\n const lead_image_url = this.lead_image_url({ ...options, content });\n const dek = this.dek({ ...options, content });\n const next_page_url = this.next_page_url(options);\n const excerpt = this.excerpt({ ...options, content });\n const word_count = this.word_count({ ...options, content });\n const direction = this.direction({ title });\n const { url, domain } = this.url_and_domain(options);\n\n return {\n title,\n author,\n date_published: date_published || null,\n dek,\n lead_image_url,\n content,\n next_page_url,\n url,\n domain,\n excerpt,\n word_count,\n direction,\n };\n },\n};\n\nexport default GenericExtractor;\n","import URL from 'url';\n\nimport Extractors from './all';\nimport GenericExtractor from './generic';\n\nexport default function getExtractor(url, parsedUrl) {\n parsedUrl = parsedUrl || URL.parse(url);\n const { hostname } = parsedUrl;\n const baseDomain = hostname.split('.').slice(-2).join('.');\n\n return Extractors[hostname] || Extractors[baseDomain] || GenericExtractor;\n}\n","export const ATTR_RE = /\\[([\\w-]+)\\]/;\n","import 'babel-polyfill';\n\nimport Cleaners from 'cleaners';\nimport { convertNodeTo } from 'utils/dom';\nimport GenericExtractor from './generic';\nimport { ATTR_RE } from './constants';\n\n// Remove elements by an array of selectors\nexport function cleanBySelectors($content, $, { clean }) {\n if (!clean) return null;\n\n $(clean.join(','), $content).remove();\n\n return $content;\n}\n\n// Transform matching elements\nexport function transformElements($content, $, { transforms }) {\n if (!transforms) return null;\n\n Reflect.ownKeys(transforms).forEach((key) => {\n const $matches = $(key, $content);\n const value = transforms[key];\n\n // If value is a string, convert directly\n if (typeof value === 'string') {\n $matches.each((index, node) => {\n convertNodeTo($(node), $, transforms[key]);\n });\n } else if (typeof value === 'function') {\n // If value is function, apply function to node\n $matches.each((index, node) => {\n const result = value($(node), $);\n // If function returns a string, convert node to that value\n if (typeof result === 'string') {\n convertNodeTo($(node), $, result);\n }\n });\n }\n });\n\n return $content;\n}\n\nexport function select(opts) {\n const { $, type, extractionOpts, extractHtml = false } = opts;\n // Skip if there's not extraction for this type\n if (!extractionOpts) return null;\n\n // If a string is hardcoded for a type (e.g., Wikipedia\n // contributors), return the string\n if (typeof extractionOpts === 'string') return extractionOpts;\n\n const { selectors, defaultCleaner = true } = extractionOpts;\n\n const matchingSelector = selectors.find(selector => $(selector).length === 1 && $(selector).text().trim() !== '');\n\n if (!matchingSelector) return null;\n\n // Declaring result; will contain either\n // text or html, which will be cleaned\n // by the appropriate cleaner type\n\n // If the selector type requests html as its return type\n // transform and clean the element with provided selectors\n if (extractHtml) {\n let $content = $(matchingSelector);\n\n // Wrap in div so transformation can take place on root element\n $content.wrap($(''));\n $content = $content.parent();\n\n $content = transformElements($content, $, extractionOpts);\n $content = cleanBySelectors($content, $, extractionOpts);\n\n if (defaultCleaner) {\n $content = Cleaners[type]($content, opts);\n }\n\n return $.html($content);\n }\n // if selector includes an attr (e.g., img[src]),\n // extract the attr\n const attr = matchingSelector.match(ATTR_RE);\n let result;\n\n if (attr) {\n result = $(matchingSelector).attr(attr[1]);\n } else {\n // otherwise use the text of the node\n result = $(matchingSelector).text();\n }\n\n // Allow custom extractor to skip default cleaner\n // for this type; defaults to true\n if (defaultCleaner) {\n return Cleaners[type](result, opts);\n }\n\n return result;\n}\n\nfunction extractResult(opts) {\n const { type, extractor } = opts;\n\n // If nothing matches the selector,\n // run the Generic extraction\n return select({ ...opts, extractionOpts: extractor[type] }) ||\n GenericExtractor[type](opts);\n}\n\nconst RootExtractor = {\n extract(extractor = GenericExtractor, opts) {\n const { contentOnly, extractedTitle } = opts;\n // This is the generic extractor. Run its extract method\n if (extractor.domain === '*') return extractor.extract(opts);\n\n opts = {\n ...opts,\n extractor,\n };\n\n if (contentOnly) {\n const content = extractResult({\n ...opts, type: 'content', extractHtml: true, title: extractedTitle,\n });\n return {\n content,\n };\n }\n const title = extractResult({ ...opts, type: 'title' });\n const date_published = extractResult({ ...opts, type: 'date_published' });\n const author = extractResult({ ...opts, type: 'author' });\n const next_page_url = extractResult({ ...opts, type: 'next_page_url' });\n const content = extractResult({\n ...opts, type: 'content', extractHtml: true, title,\n });\n const lead_image_url = extractResult({ ...opts, type: 'lead_image_url', content });\n const dek = extractResult({ ...opts, type: 'dek', content });\n const excerpt = extractResult({ ...opts, type: 'excerpt', content });\n const word_count = extractResult({ ...opts, type: 'word_count', content });\n const direction = extractResult({ ...opts, type: 'direction', title });\n const { url, domain } = extractResult({ ...opts, type: 'url_and_domain' });\n\n return {\n title,\n content,\n author,\n date_published,\n lead_image_url,\n dek,\n next_page_url,\n url,\n domain,\n excerpt,\n word_count,\n direction,\n };\n },\n};\n\nexport default RootExtractor;\n","import 'babel-polyfill';\nimport { removeAnchor } from 'utils/text';\nimport RootExtractor from 'extractors/root-extractor';\nimport Resource from 'resource';\n\nexport default async function collectAllPages(\n {\n next_page_url,\n html,\n $,\n metaCache,\n result,\n Extractor,\n title,\n url,\n }\n) {\n // At this point, we've fetched just the first page\n let pages = 1;\n const previousUrls = [removeAnchor(url)];\n\n // If we've gone over 26 pages, something has\n // likely gone wrong.\n while (next_page_url && pages < 26) {\n pages += 1;\n $ = await Resource.create(next_page_url);\n html = $.html();\n\n const extractorOpts = {\n url: next_page_url,\n html,\n $,\n metaCache,\n contentOnly: true,\n extractedTitle: title,\n previousUrls,\n };\n\n const nextPageResult = RootExtractor.extract(Extractor, extractorOpts);\n\n previousUrls.push(next_page_url);\n result = {\n ...result,\n content: `\n ${result.content}\n \n
Page ${pages}
\n ${nextPageResult.content}\n `,\n };\n\n next_page_url = nextPageResult.next_page_url;\n }\n\n return {\n ...result,\n total_pages: pages,\n pages_rendered: pages,\n };\n}\n","import URL from 'url';\n\nimport Resource from 'resource';\nimport {\n validateUrl,\n Errors,\n} from 'utils';\nimport getExtractor from 'extractors/get-extractor';\nimport RootExtractor from 'extractors/root-extractor';\nimport collectAllPages from 'extractors/collect-all-pages';\n\nconst Iris = {\n async parse(url, html, opts = {}) {\n const { fetchAllPages = true } = opts || true;\n\n const parsedUrl = URL.parse(url);\n\n if (!validateUrl(parsedUrl)) {\n return Errors.badUrl;\n }\n\n const Extractor = getExtractor(url, parsedUrl);\n // console.log(`Using extractor for ${Extractor.domain}`);\n\n const $ = await Resource.create(url, html, parsedUrl);\n\n // If we found an error creating the resource, return that error\n if ($.error) {\n return $;\n }\n\n html = $.html();\n\n // Cached value of every meta name in our document.\n // Used when extracting title/author/date_published/dek\n const metaCache = $('meta').map((_, node) => $(node).attr('name')).toArray();\n\n let result = RootExtractor.extract(Extractor, { url, html, $, metaCache, parsedUrl });\n const { title, next_page_url } = result;\n\n // Fetch more pages if next_page_url found\n if (fetchAllPages && next_page_url) {\n result = await collectAllPages(\n {\n Extractor,\n next_page_url,\n html,\n $,\n metaCache,\n result,\n title,\n url,\n }\n );\n } else {\n result = {\n ...result,\n total_pages: 1,\n rendered_pages: 1,\n };\n }\n\n return result;\n },\n\n};\n\nexport default Iris;\n"],"names":["range","start","end","validateUrl","hostname","Errors","REQUEST_HEADERS","FETCH_TIMEOUT","BAD_CONTENT_TYPES","BAD_CONTENT_TYPES_RE","RegExp","join","MAX_CONTENT_LENGTH","get","options","Promise","resolve","reject","err","response","body","validateResponse","parseNon2xx","statusMessage","statusCode","Error","error","headers","contentType","contentLength","test","url","parsedUrl","URL","parse","encodeURI","badUrl","fetchResource","convertMetaProp","$","from","to","each","_","node","$node","value","attr","removeAttr","normalizeMetaTags","IS_LINK","IS_IMAGE","TAGS_TO_REMOVE","convertLazyLoadedImages","img","ownKeys","attribs","forEach","isComment","index","type","cleanComments","root","find","contents","filter","remove","clean","Resource","preparedResponse","validResponse","result","generateDoc","content","includes","cheerio","load","normalizeWhitespace","children","length","NYMagExtractor","$children","tagName","BloggerExtractor","WikipediaExtractor","$parent","parents","prepend","TwitterExtractor","tweets","$tweetContainer","append","replaceWith","Extractors","SPACER_RE","STRIP_OUTPUT_TAGS","REMOVE_ATTRS","REMOVE_ATTR_SELECTORS","map","selector","REMOVE_ATTR_LIST","WHITELIST_ATTRS","WHITELIST_ATTRS_RE","REMOVE_EMPTY_TAGS","REMOVE_EMPTY_SELECTORS","tag","CLEAN_CONDITIONALLY_TAGS","HEADER_TAGS","HEADER_TAG_LIST","UNLIKELY_CANDIDATES_BLACKLIST","UNLIKELY_CANDIDATES_WHITELIST","DIV_TO_P_BLOCK_TAGS","NON_TOP_CANDIDATE_TAGS","NON_TOP_CANDIDATE_TAGS_RE","PHOTO_HINTS","PHOTO_HINTS_RE","POSITIVE_SCORE_HINTS","POSITIVE_SCORE_RE","NEGATIVE_SCORE_HINTS","NEGATIVE_SCORE_RE","IS_WP_SELECTOR","EXTRANEOUS_LINK_HINTS","EXTRANEOUS_LINK_HINTS_RE","PAGE_RE","BLOCK_LEVEL_TAGS","BLOCK_LEVEL_TAGS_RE","candidatesBlacklist","CANDIDATES_BLACKLIST","candidatesWhitelist","CANDIDATES_WHITELIST","stripUnlikelyCandidates","not","classes","id","classAndId","brsToPs","collapsing","element","nextElement","next","paragraphize","br","sibling","nextSibling","p","appendTo","convertDivs","div","$div","convertable","convertSpans","span","$span","convertToParagraphs","convertNodeTo","attribString","Reflect","key","cleanForHeight","$img","height","parseInt","width","removeSpacers","cleanImages","$article","stripJunkTags","article","cleanHOnes","$hOnes","removeAllButWhitelist","reduce","acc","cleanAttributes","removeEmpty","$p","text","trim","HNEWS_CONTENT_SELECTORS","READABILITY_ASSET","PARAGRAPH_SCORE_TAGS","CHILD_CONTENT_TAGS","BAD_TAGS","getWeight","score","getScore","parseFloat","scoreCommas","match","idkRe","scoreLength","textLength","chunks","lengthBonus","Math","min","max","scoreParagraph","slice","setScore","addScore","amount","getOrInitScore","e","addToParent","parent","weightNodes","scoreNode","addScoreTo","scorePs","rawScore","scoreContent","parentSelector","childSelector","NORMALIZE_RE","normalizeSpaces","replace","extractFromUrl","regexList","matchRe","re","exec","PAGE_IN_HREF_RE","HAS_ALPHA_RE","IS_ALPHA_RE","IS_DIGIT_RE","pageNumFromUrl","matches","pageNum","removeAnchor","split","isGoodSegment","segment","firstSegmentHasLetters","goodSegment","toLowerCase","articleBaseUrl","parsed","protocol","host","path","cleanedSegments","reverse","rawSegment","possibleSegment","fileExt","push","SENTENCE_END_RE","hasSentenceEnd","mergeSiblings","$candidate","topScore","siblingScoreThreshold","wrappingDiv","$sibling","siblingScore","contentBonus","density","linkDensity","newScore","siblingContent","siblingContentLength","findTopCandidate","first","removeUnlessContent","weight","hasClass","pCount","inputCount","imgCount","nodeIsList","previousNode","prev","scriptCount","cleanTags","cleanHeaders","title","header","$header","prevAll","rewriteTopLevel","absolutize","rootUrl","$content","absoluteUrl","makeLinksAbsolute","totalTextLength","linkText","linkLength","extractFromMeta","metaNames","cachedNames","foundNames","indexOf","name","nodes","values","toArray","metaValue","stripTags","isGoodNode","maxChildren","withinComment","extractFromSelectors","selectors","textOnly","html","cleanText","commentParent","class","undefined","nodeIsSufficient","isWordpress","CLEAN_AUTHOR_RE","TEXT_LINK_RE","MS_DATE_STRING","SEC_DATE_STRING","CLEAN_DATE_STRING_RE","TIME_MERIDIAN_SPACE_RE","TIME_MERIDIAN_DOTS_RE","months","allMonths","timestamp1","timestamp2","SPLIT_DATE_STRING","TITLE_SPLITTERS_RE","DOMAIN_ENDINGS_RE","cleanAuthor","author","leadImageUrl","validUrl","isWebUri","cleanDek","dek","dekText","cleanDateString","dateString","cleanDatePublished","date","moment","Date","isValid","toISOString","extractCleanNode","cleanConditionally","cleanTitle","resolveSplitTitle","h1","extractBreadcrumbTitle","splitTitle","termCounts","titleText","maxTerm","termCount","splitEnds","longestEnd","cleanDomainFromTitle","nakedDomain","startSlug","startSlugRatio","wuzzy","levenshtein","endSlug","endSlugRatio","newTitle","Cleaners","cleanImage","cleanContent","extractBestNode","opts","$topCandidate","GenericContentExtractor","defaultOpts","getContentNode","cleanAndReturnNode","k","STRONG_TITLE_META_TAGS","WEAK_TITLE_META_TAGS","STRONG_TITLE_SELECTORS","WEAK_TITLE_SELECTORS","GenericTitleExtractor","metaCache","AUTHOR_META_TAGS","AUTHOR_MAX_LENGTH","AUTHOR_SELECTORS","bylineRe","BYLINE_SELECTORS_RE","GenericAuthorExtractor","regex","DATE_PUBLISHED_META_TAGS","DATE_PUBLISHED_SELECTORS","abbrevMonthsStr","DATE_PUBLISHED_URL_RES","GenericDatePublishedExtractor","datePublished","GenericDekExtractor","LEAD_IMAGE_URL_META_TAGS","LEAD_IMAGE_URL_SELECTORS","POSITIVE_LEAD_IMAGE_URL_HINTS","POSITIVE_LEAD_IMAGE_URL_HINTS_RE","NEGATIVE_LEAD_IMAGE_URL_HINTS","NEGATIVE_LEAD_IMAGE_URL_HINTS_RE","GIF_RE","JPG_RE","getSig","scoreImageUrl","scoreAttr","scoreByParents","$figParent","$gParent","scoreBySibling","scoreByDimensions","src","area","round","scoreByPosition","$imgs","GenericLeadImageUrlExtractor","cleanUrl","imageUrl","imgs","imgScores","topUrl","href","scoreSimilarity","articleUrl","similarity","difflib","SequenceMatcher","ratio","diffPercent","diffModifier","scoreLinkText","linkTextAsNum","scorePageInLink","isWp","DIGIT_RE","NEXT_LINK_TEXT_RE","CAP_LINK_TEXT_RE","PREV_LINK_TEXT_RE","scoreExtraneousLinks","makeSig","$link","positiveMatch","negativeMatch","parentData","scorePrevLink","linkData","shouldScore","baseUrl","previousUrls","linkHost","fragment","scoreBaseUrl","baseRegex","scoreNextLinkText","scoreCapLinks","makeBaseRegex","scoreLinks","links","scoredPages","possiblePages","link","possiblePage","GenericNextPageUrlExtractor","scoredLinks","topPage","scoredLink","CANONICAL_META_SELECTORS","parseDomain","GenericUrlExtractor","$canonical","metaUrl","EXCERPT_META_SELECTORS","maxLength","ellipsize","ellipse","GenericExcerptExtractor","excerpt","shortContent","GenericWordCountExtractor","GenericExtractor","extract","bind","stringDirection","getDirection","date_published","lead_image_url","next_page_url","word_count","direction","url_and_domain","domain","getExtractor","baseDomain","ATTR_RE","cleanBySelectors","transformElements","transforms","$matches","select","extractionOpts","extractHtml","defaultCleaner","matchingSelector","wrap","extractResult","extractor","RootExtractor","contentOnly","extractedTitle","Extractor","pages","create","extractorOpts","nextPageResult","collectAllPages","Iris","fetchAllPages"],"mappings":";;;;;;;;;;;;;;;eAAyBA;;AAAzB,AAAe,SAAUA,KAAV;MAAgBC,KAAhB,yDAAwB,CAAxB;MAA2BC,GAA3B,yDAAiC,CAAjC;;;;;gBACND,SAASC,GADH;;;;;;iBAELD,SAAS,CAFJ;;;;;;;;;;;;;;ACAf;AACA,AAAe,SAASE,WAAT,OAAmC;MAAZC,QAAY,QAAZA,QAAY;;;SAEzC,CAAC,CAACA,QAAT;;;ACHF,IAAMC,SAAS;UACL;WACC,IADD;cAEI;;CAHd,CAOA;;ACPO,IAAMC,kBAAkB;gBACf;CADT;;;AAKP,AAAO,IAAMC,gBAAgB,KAAtB;;;AAGP,IAAMC,oBAAoB,CACxB,YADwB,EAExB,WAFwB,EAGxB,YAHwB,EAIxB,WAJwB,CAA1B;;AAOA,AAAO,IAAMC,uBAAuB,IAAIC,MAAJ,QAAgBF,kBAAkBG,IAAlB,CAAuB,GAAvB,CAAhB,SAAiD,GAAjD,CAA7B;;;;AAKP,AAAO,IAAMC,qBAAqB,OAA3B,CAEP,AAIA,AAKA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AClBA,SAASC,GAAT,CAAaC,OAAb,EAAsB;SACb,IAAIC,OAAJ,CAAY,UAACC,OAAD,EAAUC,MAAV,EAAqB;YAC9BH,OAAR,EAAiB,UAACI,GAAD,EAAMC,QAAN,EAAgBC,IAAhB,EAAyB;UACpCF,GAAJ,EAAS;eACAA,GAAP;OADF,MAEO;gBACG,EAAEE,UAAF,EAAQD,kBAAR,EAAR;;KAJJ;GADK,CAAP;;;;;;;;AAgBF,AAAO,SAASE,gBAAT,CAA0BF,QAA1B,EAAyD;MAArBG,WAAqB,yDAAP,KAAO;;;MAE1DH,SAASI,aAAT,KAA2B,IAA/B,EAAqC;QAC/B,CAACJ,SAASK,UAAd,EAA0B;YAClB,IAAIC,KAAJ,sDAC+CN,SAASO,KADxD,CAAN;KADF,MAIO,IAAI,CAACJ,WAAL,EAAkB;YACjB,IAAIG,KAAJ,kDAC2CN,SAASK,UADpD,wEAAN;;;;0BASAL,SAASQ,OAjBiD;MAe5CC,WAf4C,qBAe5D,cAf4D;MAgB1CC,aAhB0C,qBAgB5D,gBAhB4D;;;;MAoB1DpB,qBAAqBqB,IAArB,CAA0BF,WAA1B,CAAJ,EAA4C;UACpC,IAAIH,KAAJ,yCACkCG,WADlC,0BAAN;;;;MAMEC,gBAAgBjB,kBAApB,EAAwC;UAChC,IAAIa,KAAJ,yEACkEb,kBADlE,OAAN;;;SAKK,IAAP;;;AAGF,AAMA;;;;;;AAMA;uDAAe,iBAA6BmB,GAA7B,EAAkCC,SAAlC;;;;;;;wBACDA,aAAaC,IAAIC,KAAJ,CAAUC,UAAUJ,GAAV,CAAV,CAAzB;;mBADa,GAGG;mBACTC,SADS;oCAEA1B,eAAd,CAFc;uBAGLC,aAHK;;;wBAMJ,IANI;;mBAQT,IARS;;oBAUR,IAVQ;;kCAYM;aAfT;;mBAkBoBM,IAAIC,OAAJ,CAlBpB;;;;oBAAA,SAkBLK,QAlBK;gBAAA,SAkBKC,IAlBL;;;6BAqBMD,QAAjB;6CACO,EAAEC,UAAF,EAAQD,kBAAR,EAtBI;;;;;6CAwBJd,OAAO+B,MAxBH;;;;;;;;GAAf;;WAA8BC,aAA9B;;;;SAA8BA,aAA9B;;;AC9EA,SAASC,eAAT,CAAyBC,CAAzB,EAA4BC,IAA5B,EAAkCC,EAAlC,EAAsC;cAC1BD,IAAV,QAAmBE,IAAnB,CAAwB,UAACC,CAAD,EAAIC,IAAJ,EAAa;QAC7BC,QAAQN,EAAEK,IAAF,CAAd;;QAEME,QAAQD,MAAME,IAAN,CAAWP,IAAX,CAAd;UACMO,IAAN,CAAWN,EAAX,EAAeK,KAAf;UACME,UAAN,CAAiBR,IAAjB;GALF;;SAQOD,CAAP;;;;;;;;;;AAUF,AAAe,SAASU,iBAAT,CAA2BV,CAA3B,EAA8B;MACvCD,gBAAgBC,CAAhB,EAAmB,SAAnB,EAA8B,OAA9B,CAAJ;MACID,gBAAgBC,CAAhB,EAAmB,UAAnB,EAA+B,MAA/B,CAAJ;SACOA,CAAP;;;ACtBK,IAAMW,UAAU,IAAIxC,MAAJ,CAAW,WAAX,EAAwB,GAAxB,CAAhB;AACP,AAAO,IAAMyC,WAAW,IAAIzC,MAAJ,CAAW,kBAAX,EAA+B,GAA/B,CAAjB;;AAEP,AAAO,IAAM0C,iBAAiB,CAC5B,QAD4B,EAE5B,OAF4B,EAG5B,MAH4B,EAI5BzC,IAJ4B,CAIvB,GAJuB,CAAvB;;ACIP;;;;;AAKA,AAAe,SAAS0C,uBAAT,CAAiCd,CAAjC,EAAoC;IAC/C,KAAF,EAASG,IAAT,CAAc,UAACC,CAAD,EAAIW,GAAJ,EAAY;YAChBC,OAAR,CAAgBD,IAAIE,OAApB,EAA6BC,OAA7B,CAAqC,UAACV,IAAD,EAAU;UACvCD,QAAQQ,IAAIE,OAAJ,CAAYT,IAAZ,CAAd;;UAEIA,SAAS,KAAT,IAAkBG,QAAQpB,IAAR,CAAagB,KAAb,CAAlB,IACAK,SAASrB,IAAT,CAAcgB,KAAd,CADJ,EAC0B;UACtBQ,GAAF,EAAOP,IAAP,CAAY,KAAZ,EAAmBD,KAAnB;;KALJ;GADF;;SAWOP,CAAP;;;ACtBF,SAASmB,SAAT,CAAmBC,KAAnB,EAA0Bf,IAA1B,EAAgC;SACvBA,KAAKgB,IAAL,KAAc,SAArB;;;AAGF,SAASC,aAAT,CAAuBtB,CAAvB,EAA0B;IACtBuB,IAAF,GAASC,IAAT,CAAc,GAAd,EACSC,QADT,GAESC,MAFT,CAEgBP,SAFhB,EAGSQ,MAHT;;SAKO3B,CAAP;;;AAGF,AAAe,SAAS4B,KAAT,CAAe5B,CAAf,EAAkB;IAC7Ba,cAAF,EAAkBc,MAAlB;;MAEIL,cAActB,CAAd,CAAJ;SACOA,CAAP;;;ACRF,IAAM6B,WAAW;;;;;;;;QAAA,kBAQFrC,GARE,EAQGsC,gBARH,EAQqBrC,SARrB,EAQgC;;;;;;;;;oBAAA;;mBAGzCqC,gBAHyC;;;;;2BAAA,GAIrB;+BACL,IADK;4BAER,GAFQ;yBAGX;kCACS,WADT;oCAEW;;eATqB;;;uBAalC,EAAEjD,MAAMiD,gBAAR,EAA0BlD,UAAUmD,aAApC,EAAT;;;;;;qBAEejC,cAAcN,GAAd,EAAmBC,SAAnB,CAf4B;;;oBAAA;;;mBAkBzCuC,OAAO7C,KAlBkC;;;;;+CAmBpC6C,MAnBoC;;;+CAsBtC,MAAKC,WAAL,CAAiBD,MAAjB,CAtBsC;;;;;;;;;GARhC;aAAA,6BAiC0B;QAArBE,OAAqB,QAA3BrD,IAA2B;QAAZD,QAAY,QAAZA,QAAY;QACfS,WADe,GACCT,SAASQ,OADV,CAC/B,cAD+B;;;;;QAKnC,CAACC,YAAY8C,QAAZ,CAAqB,MAArB,CAAD,IACA,CAAC9C,YAAY8C,QAAZ,CAAqB,MAArB,CADL,EACmC;YAC3B,IAAIjD,KAAJ,CAAU,qCAAV,CAAN;;;QAGEc,IAAIoC,QAAQC,IAAR,CAAaH,OAAb,EAAsB,EAAEI,qBAAqB,IAAvB,EAAtB,CAAR;;QAEItC,EAAEuB,IAAF,GAASgB,QAAT,GAAoBC,MAApB,KAA+B,CAAnC,EAAsC;YAC9B,IAAItD,KAAJ,CAAU,kCAAV,CAAN;;;QAGEwB,kBAAkBV,CAAlB,CAAJ;QACIc,wBAAwBd,CAAxB,CAAJ;QACI4B,MAAM5B,CAAN,CAAJ;;WAEOA,CAAP;;CArDJ,CAyDA;;ACpEA,IAAMyC,iBAAiB;UACb,WADa;WAEZ;;eAEI,CACT,qBADS,EAET,cAFS,EAGT,iBAHS,CAFJ;;;WASA,CACL,KADK,EAEL,uBAFK,CATA;;;;;;;;gBAoBK;;UAEN,IAFM;;;gBAKA,kBAACnC,KAAD,EAAW;YACboC,YAAYpC,MAAMiC,QAAN,EAAlB;YACIG,UAAUF,MAAV,KAAqB,CAArB,IAA0BE,UAAUpE,GAAV,CAAc,CAAd,EAAiBqE,OAAjB,KAA6B,KAA3D,EAAkE;iBACzD,QAAP;;;eAGK,IAAP;;;GAjCe;;SAsCd;eACM,CACT,uBADS,EAET,qBAFS,EAGT,IAHS;GAvCQ;;UA8Cb;eACK,CACT,aADS,EAET,sBAFS;GA/CQ;;OAqDhB;eACQ,CACT,sBADS;GAtDQ;;kBA2DL;eACH,CACT,kCADS,EAET,wBAFS;;CA5Df,CAmEA;;ACnEA,IAAMC,mBAAmB;UACf,cADe;WAEd;;;;eAII,CACT,wBADS,CAJJ;;;WASA,EATA;;;gBAaK;gBACA;;GAhBS;;UAoBf;eACK,CACT,mBADS;GArBU;;SA0BhB;eACM,CACT,UADS;GA3BU;;kBAgCP;eACH,CACT,kBADS;;CAjCf,CAuCA;;ACvCA,IAAMC,qBAAqB;UACjB,eADiB;WAEhB;eACI,CACT,kBADS,CADJ;;oBAKS,KALT;;;gBAQK;sBACM,oBAACvC,KAAD,EAAW;YACnBwC,UAAUxC,MAAMyC,OAAN,CAAc,UAAd,CAAhB;;YAEID,QAAQP,QAAR,CAAiB,KAAjB,EAAwBC,MAAxB,KAAmC,CAAvC,EAA0C;kBAChCQ,OAAR,CAAgB1C,KAAhB;;OALM;0BAQU,YARV;kBASE;KAjBP;;;WAqBA,CACL,iBADK,EAEL,oCAFK,EAGL,MAHK;;GAvBgB;;UA+BjB,wBA/BiB;;SAiClB;eACM,CACT,UADS;GAlCY;;kBAuCT;eACH,CACT,sBADS;;;CAxCf,CA+CA;;AC/CA,IAAM2C,mBAAmB;UACf,aADe;;WAGd;gBACK;;;;;+BAKe,2BAAC3C,KAAD,EAAQN,CAAR,EAAc;YAC/BkD,SAAS5C,MAAMkB,IAAN,CAAW,QAAX,CAAf;YACM2B,kBAAkBnD,EAAE,iCAAF,CAAxB;wBACgBoD,MAAhB,CAAuBF,MAAvB;cACMG,WAAN,CAAkBF,eAAlB;OATQ;;;;SAcP;KAfE;;eAkBI,CACT,uBADS,CAlBJ;;oBAsBS,KAtBT;;WAwBA,CACL,qBADK,EAEL,QAFK,EAGL,sBAHK;GA3Bc;;UAkCf;eACK,CACT,kCADS;GAnCU;;kBAwCP;eACH,CACT,4CADS;;;CAzCf,CAkDA;;AC7CA,IAAMG,aAAa;eACJb,cADI;kBAEDG,gBAFC;mBAGAC,kBAHA;iBAIFI;CAJjB,CAOA;;ACZA;AACA,AAAO,IAAMM,YAAY,IAAIpF,MAAJ,CAAW,gCAAX,EAA6C,GAA7C,CAAlB;;;AAGP,AAAO,IAAMqF,oBAAoB,CAC/B,OAD+B,EAE/B,QAF+B,EAG/B,UAH+B,EAI/B,MAJ+B,EAK/B,OAL+B,EAM/B,IAN+B,EAO/B,OAP+B,EAQ/B,QAR+B,EAS/B,QAT+B,CAA1B;;;AAaP,AAAO,IAAMC,eAAe,CAAC,OAAD,EAAU,OAAV,CAArB;AACP,AAAO,IAAMC,wBAAwBD,aAAaE,GAAb,CAAiB;eAAgBC,QAAhB;CAAjB,CAA9B;AACP,AAAO,IAAMC,mBAAmBJ,aAAarF,IAAb,CAAkB,GAAlB,CAAzB;AACP,AAAO,IAAM0F,kBAAkB,CAAC,KAAD,EAAQ,MAAR,EAAgB,OAAhB,EAAyB,IAAzB,EAA+B,OAA/B,CAAxB;AACP,AAAO,IAAMC,qBAAqB,IAAI5F,MAAJ,QAAgB2F,gBAAgB1F,IAAhB,CAAqB,GAArB,CAAhB,SAA+C,GAA/C,CAA3B;;;AAGP,AAAO,IAAM4F,oBAAoB,CAAC,GAAD,CAA1B;AACP,AAAO,IAAMC,yBAAyBD,kBAAkBL,GAAlB,CAAsB;SAAUO,GAAV;CAAtB,EAA6C9F,IAA7C,CAAkD,GAAlD,CAA/B;;;AAGP,AAAO,IAAM+F,2BAA2B,CAAC,IAAD,EAAO,IAAP,EAAa,OAAb,EAAsB,KAAtB,EAA6B,QAA7B,EAAuC,MAAvC,EAA+C/F,IAA/C,CAAoD,GAApD,CAAjC;;;AAGP,IAAMgG,cAAc,CAAC,IAAD,EAAO,IAAP,EAAa,IAAb,EAAmB,IAAnB,EAAyB,IAAzB,CAApB;AACA,AAAO,IAAMC,kBAAkBD,YAAYhG,IAAZ,CAAiB,GAAjB,CAAxB;;;;;;;;AASP,AAAO,IAAMkG,gCAAgC,CAC3C,UAD2C,EAE3C,OAF2C,EAG3C,QAH2C,EAI3C,SAJ2C,EAK3C,SAL2C,EAM3C,KAN2C,EAO3C,gBAP2C,EAQ3C,OAR2C,EAS3C,SAT2C,EAU3C,cAV2C,EAW3C,QAX2C,EAY3C,iBAZ2C,EAa3C,OAb2C,EAc3C,MAd2C;;AAgB3C,QAhB2C,EAiB3C,QAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C;AAoB3C,MApB2C,EAqB3C,MArB2C,EAsB3C,KAtB2C,EAuB3C,UAvB2C,EAwB3C,OAxB2C,EAyB3C,YAzB2C,EA0B3C,UA1B2C;AA2B3C,2BA3B2C;AA4B3C,OA5B2C,EA6B3C,eA7B2C,EA8B3C,SA9B2C,EA+B3C,QA/B2C,EAgC3C,QAhC2C,EAiC3C,KAjC2C,EAkC3C,OAlC2C,EAmC3C,UAnC2C,EAoC3C,SApC2C,EAqC3C,UArC2C,EAsC3C,SAtC2C,EAuC3C,SAvC2C,EAwC3C,OAxC2C,CAAtC;;;;;;;;;;;;;AAsDP,AAAO,IAAMC,gCAAgC,CAC3C,KAD2C,EAE3C,SAF2C,EAG3C,MAH2C,EAI3C,WAJ2C,EAK3C,QAL2C,EAM3C,SAN2C,EAO3C,qBAP2C,EAQ3C,QAR2C;AAS3C,OAT2C,EAU3C,QAV2C,EAW3C,OAX2C,EAY3C,MAZ2C,EAa3C,MAb2C,EAc3C,OAd2C,EAe3C,QAf2C,CAAtC;;;;;AAqBP,AAAO,IAAMC,sBAAsB,CACjC,GADiC,EAEjC,YAFiC,EAGjC,IAHiC,EAIjC,KAJiC,EAKjC,KALiC,EAMjC,GANiC,EAOjC,KAPiC,EAQjC,OARiC,EASjCpG,IATiC,CAS5B,GAT4B,CAA5B;;;;AAaP,AAAO,IAAMqG,yBAAyB,CACpC,IADoC,EAEpC,GAFoC,EAGpC,GAHoC,EAIpC,OAJoC,EAKpC,IALoC,EAMpC,MANoC,EAOpC,MAPoC,EAQpC,UARoC,EASpC,OAToC,EAUpC,KAVoC,EAWpC,MAXoC,EAYpC,MAZoC,CAA/B;;AAeP,AAAO,IAAMC,4BACX,IAAIvG,MAAJ,QAAgBsG,uBAAuBrG,IAAvB,CAA4B,GAA5B,CAAhB,SAAsD,GAAtD,CADK;;AAGP,AAYA,AAAO,IAAMuG,cAAc,CACzB,QADyB,EAEzB,OAFyB,EAGzB,OAHyB,EAIzB,SAJyB,CAApB;AAMP,AAAO,IAAMC,iBAAiB,IAAIzG,MAAJ,CAAWwG,YAAYvG,IAAZ,CAAiB,GAAjB,CAAX,EAAkC,GAAlC,CAAvB;;;;;;AAOP,AAAO,IAAMyG,uBAAuB,CAClC,SADkC,EAElC,gBAFkC,EAGlC,iBAHkC,EAIlC,MAJkC,EAKlC,MALkC,EAMlC,SANkC,EAOlC,qBAPkC,EAQlC,OARkC,EASlC,QATkC,EAUlC,MAVkC,EAWlC,QAXkC,EAYlC,MAZkC,EAalC,YAbkC,EAclC,WAdkC,EAelC,MAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,UAlBkC;AAmBlC,SAnBkC,CAA7B;;;AAuBP,AAAO,IAAMC,oBAAoB,IAAI3G,MAAJ,CAAW0G,qBAAqBzG,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;AAEP,AAGA;;;;AAIA,AAAO,IAAM2G,uBAAuB,CAClC,OADkC,EAElC,QAFkC,EAGlC,QAHkC,EAIlC,KAJkC,EAKlC,UALkC,EAMlC,QANkC,EAOlC,QAPkC,EAQlC,OARkC,EASlC,MATkC,EAUlC,OAVkC,EAWlC,SAXkC,EAYlC,YAZkC,EAalC,SAbkC,EAclC,MAdkC,EAelC,QAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,MAlBkC,EAmBlC,SAnBkC,EAoBlC,UApBkC;AAqBlC,MArBkC,EAsBlC,QAtBkC,EAuBlC,UAvBkC,EAwBlC,MAxBkC,EAyBlC,MAzBkC,EA0BlC,MA1BkC,EA2BlC,UA3BkC;AA4BlC,mBA5BkC,EA6BlC,MA7BkC,EA8BlC,WA9BkC,EA+BlC,MA/BkC,EAgClC,UAhCkC,EAiClC,OAjCkC,EAkClC,MAlCkC,EAmClC,OAnCkC,EAoClC,UApCkC;AAqClC,OArCkC,EAsClC,KAtCkC;AAuClC,SAvCkC,EAwClC,SAxCkC,EAyClC,cAzCkC;AA0ClC,QA1CkC,EA2ClC,WA3CkC,EA4ClC,OA5CkC,EA6ClC,UA7CkC,EA8ClC,UA9CkC,EA+ClC,MA/CkC,EAgDlC,SAhDkC,EAiDlC,SAjDkC,EAkDlC,OAlDkC,EAmDlC,KAnDkC,EAoDlC,SApDkC,EAqDlC,MArDkC,EAsDlC,OAtDkC,EAuDlC,QAvDkC,CAA7B;;AA0DP,AAAO,IAAMC,oBAAoB,IAAI7G,MAAJ,CAAW4G,qBAAqB3G,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO,IAAM6G,iBAAiB,wCAAvB;;AAEP,AAGA;;AAEA,AAAO,IAAMC,wBAAwB,CACnC,OADmC,EAEnC,SAFmC,EAGnC,SAHmC,EAInC,SAJmC,EAKnC,QALmC,EAMnC,OANmC,EAOnC,OAPmC,EAQnC,OARmC,EASnC,KATmC,EAUnC,OAVmC,EAWnC,MAXmC,EAYnC,QAZmC,EAanC,KAbmC,EAcnC,iBAdmC,CAA9B;AAgBP,AAAO,IAAMC,2BAA2B,IAAIhH,MAAJ,CAAW+G,sBAAsB9G,IAAtB,CAA2B,GAA3B,CAAX,EAA4C,GAA5C,CAAjC;;;AAGP,AAAO,IAAMgH,UAAU,IAAIjH,MAAJ,CAAW,iBAAX,EAA8B,GAA9B,CAAhB;;AAEP,AAMA,AAIA,AAIA,AAGA,AAGA;;AAEA,AAAO,IAAMkH,mBAAmB,CAC9B,SAD8B,EAE9B,OAF8B,EAG9B,YAH8B,EAI9B,MAJ8B,EAK9B,IAL8B,EAM9B,QAN8B,EAO9B,QAP8B,EAQ9B,SAR8B,EAS9B,KAT8B,EAU9B,UAV8B,EAW9B,IAX8B,EAY9B,KAZ8B,EAa9B,IAb8B,EAc9B,IAd8B,EAe9B,OAf8B,EAgB9B,UAhB8B,EAiB9B,YAjB8B,EAkB9B,QAlB8B,EAmB9B,QAnB8B,EAoB9B,MApB8B,EAqB9B,IArB8B,EAsB9B,IAtB8B,EAuB9B,IAvB8B,EAwB9B,IAxB8B,EAyB9B,IAzB8B,EA0B9B,IA1B8B,EA2B9B,QA3B8B,EA4B9B,QA5B8B,EA6B9B,IA7B8B,EA8B9B,IA9B8B,EA+B9B,KA/B8B,EAgC9B,QAhC8B,EAiC9B,IAjC8B,EAkC9B,QAlC8B,EAmC9B,GAnC8B,EAoC9B,KApC8B,EAqC9B,UArC8B,EAsC9B,SAtC8B,EAuC9B,OAvC8B,EAwC9B,OAxC8B,EAyC9B,UAzC8B,EA0C9B,OA1C8B,EA2C9B,IA3C8B,EA4C9B,OA5C8B,EA6C9B,IA7C8B,EA8C9B,IA9C8B,EA+C9B,OA/C8B,CAAzB;AAiDP,AAAO,IAAMC,sBAAsB,IAAInH,MAAJ,QAAgBkH,iBAAiBjH,IAAjB,CAAsB,GAAtB,CAAhB,SAAgD,GAAhD,CAA5B;;;;;;AAOP,IAAMmH,sBAAsBjB,8BAA8BlG,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,IAAMoH,uBAAuB,IAAIrH,MAAJ,CAAWoH,mBAAX,EAAgC,GAAhC,CAA7B;;AAEP,IAAME,sBAAsBlB,8BAA8BnG,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,IAAMsH,uBAAuB,IAAIvH,MAAJ,CAAWsH,mBAAX,EAAgC,GAAhC,CAA7B,CAEP,AAGA,AACA,AACA,AAEA;;AC3Xe,SAASE,uBAAT,CAAiC3F,CAAjC,EAAoC;;;;;;;;;;IAU/C,GAAF,EAAO4F,GAAP,CAAW,GAAX,EAAgBzF,IAAhB,CAAqB,UAACiB,KAAD,EAAQf,IAAR,EAAiB;QAC9BC,QAAQN,EAAEK,IAAF,CAAd;QACMwF,UAAUvF,MAAME,IAAN,CAAW,OAAX,CAAhB;QACMsF,KAAKxF,MAAME,IAAN,CAAW,IAAX,CAAX;QACI,CAACsF,EAAD,IAAO,CAACD,OAAZ,EAAqB;;QAEfE,cAAgBF,WAAW,EAA3B,WAAiCC,MAAM,EAAvC,CAAN;QACIJ,qBAAqBnG,IAArB,CAA0BwG,UAA1B,CAAJ,EAA2C;;KAA3C,MAEO,IAAIP,qBAAqBjG,IAArB,CAA0BwG,UAA1B,CAAJ,EAA2C;YAC1CpE,MAAN;;GAVJ;;SAcO3B,CAAP;;;AC3BF;;;;;;;;;;AAUA,AAAe,SAASgG,OAAT,CAAiBhG,CAAjB,EAAoB;MAC7BiG,aAAa,KAAjB;IACE,IAAF,EAAQ9F,IAAR,CAAa,UAACiB,KAAD,EAAQ8E,OAAR,EAAoB;QACzBC,cAAcnG,EAAEkG,OAAF,EAAWE,IAAX,GAAkB9H,GAAlB,CAAsB,CAAtB,CAApB;;QAEI6H,eAAeA,YAAYxD,OAAZ,KAAwB,IAA3C,EAAiD;mBAClC,IAAb;QACEuD,OAAF,EAAWvE,MAAX;KAFF,MAGO,IAAIsE,UAAJ,EAAgB;mBACR,KAAb;;mBAEaC,OAAb,EAAsBlG,CAAtB,EAAyB,IAAzB;;GATJ;;SAaOA,CAAP;;;ACzBF;;;;;;;;;;;AAWA,AAAe,SAASqG,YAAT,CAAsBhG,IAAtB,EAA4BL,CAA5B,EAA2C;MAAZsG,EAAY,yDAAP,KAAO;;MAClDhG,QAAQN,EAAEK,IAAF,CAAd;;MAEIiG,EAAJ,EAAQ;QACFC,UAAUlG,KAAKmG,WAAnB;QACMC,IAAIzG,EAAE,SAAF,CAAV;;;;WAIOuG,WAAW,EAAEA,QAAQ5D,OAAR,IAAmB2C,oBAAoB/F,IAApB,CAAyBgH,QAAQ5D,OAAjC,CAArB,CAAlB,EAAmF;UAC3E6D,cAAcD,QAAQC,WAA5B;QACED,OAAF,EAAWG,QAAX,CAAoBD,CAApB;gBACUD,WAAV;;;UAGInD,WAAN,CAAkBoD,CAAlB;UACM9E,MAAN;WACO3B,CAAP;;;SAGKA,CAAP;;;AC7BF,SAAS2G,WAAT,CAAqB3G,CAArB,EAAwB;IACpB,KAAF,EAASG,IAAT,CAAc,UAACiB,KAAD,EAAQwF,GAAR,EAAgB;QACtBC,OAAO7G,EAAE4G,GAAF,CAAb;QACME,cAAcD,KAAKtE,QAAL,CAAciC,mBAAd,EAAmChC,MAAnC,KAA8C,CAAlE;;QAEIsE,WAAJ,EAAiB;oBACDD,IAAd,EAAoB7G,CAApB,EAAuB,GAAvB;;GALJ;;SASOA,CAAP;;;AAGF,SAAS+G,YAAT,CAAsB/G,CAAtB,EAAyB;IACrB,MAAF,EAAUG,IAAV,CAAe,UAACiB,KAAD,EAAQ4F,IAAR,EAAiB;QACxBC,QAAQjH,EAAEgH,IAAF,CAAd;QACMF,cAAcG,MAAMlE,OAAN,CAAc,QAAd,EAAwBP,MAAxB,KAAmC,CAAvD;QACIsE,WAAJ,EAAiB;oBACDG,KAAd,EAAqBjH,CAArB,EAAwB,GAAxB;;GAJJ;;SAQOA,CAAP;;;;;;;;;;;;;;;AAeF,AAAe,SAASkH,mBAAT,CAA6BlH,CAA7B,EAAgC;MACzCgG,QAAQhG,CAAR,CAAJ;MACI2G,YAAY3G,CAAZ,CAAJ;MACI+G,aAAa/G,CAAb,CAAJ;;SAEOA,CAAP;;;AC5Ca,SAASmH,aAAT,CAAuB7G,KAAvB,EAA8BN,CAA9B,EAA4C;MAAXkE,GAAW,yDAAL,GAAK;;MACnD7D,OAAOC,MAAMhC,GAAN,CAAU,CAAV,CAAb;MACI,CAAC+B,IAAL,EAAW;WACFL,CAAP;;;mBAEkBM,MAAMhC,GAAN,CAAU,CAAV,CALqC;;MAKjD2C,OALiD,cAKjDA,OALiD;;MAMnDmG,eAAeC,QAAQrG,OAAR,CAAgBC,OAAhB,EACQ0C,GADR,CACY;WAAU2D,GAAV,SAAiBrG,QAAQqG,GAAR,CAAjB;GADZ,EAEQlJ,IAFR,CAEa,GAFb,CAArB;;QAIMiF,WAAN,OAAsBa,GAAtB,SAA6BkD,YAA7B,SAA6C9G,MAAMmB,QAAN,EAA7C,UAAkEyC,GAAlE;SACOlE,CAAP;;;ACXF,SAASuH,cAAT,CAAwBC,IAAxB,EAA8BxH,CAA9B,EAAiC;MACzByH,SAASC,SAASF,KAAKhH,IAAL,CAAU,QAAV,CAAT,EAA8B,EAA9B,CAAf;MACMmH,QAAQD,SAASF,KAAKhH,IAAL,CAAU,OAAV,CAAT,EAA6B,EAA7B,KAAoC,EAAlD;;;;;MAKI,CAACiH,UAAU,EAAX,IAAiB,EAAjB,IAAuBE,QAAQ,EAAnC,EAAuC;SAChChG,MAAL;GADF,MAEO,IAAI8F,MAAJ,EAAY;;;;SAIZhH,UAAL,CAAgB,QAAhB;;;SAGKT,CAAP;;;;;AAKF,SAAS4H,aAAT,CAAuBJ,IAAvB,EAA6BxH,CAA7B,EAAgC;MAC1BuD,UAAUhE,IAAV,CAAeiI,KAAKhH,IAAL,CAAU,KAAV,CAAf,CAAJ,EAAsC;SAC/BmB,MAAL;;;SAGK3B,CAAP;;;AAGF,AAAe,SAAS6H,WAAT,CAAqBC,QAArB,EAA+B9H,CAA/B,EAAkC;WACtCwB,IAAT,CAAc,KAAd,EAAqBrB,IAArB,CAA0B,UAACiB,KAAD,EAAQL,GAAR,EAAgB;QAClCyG,OAAOxH,EAAEe,GAAF,CAAb;;mBAEeyG,IAAf,EAAqBxH,CAArB;kBACcwH,IAAd,EAAoBxH,CAApB;GAJF;;SAOOA,CAAP;;;ACnCa,SAAS+H,aAAT,CAAuBC,OAAvB,EAAgChI,CAAhC,EAAmC;IAC9CwD,kBAAkBpF,IAAlB,CAAuB,GAAvB,CAAF,EAA+B4J,OAA/B,EAAwCrG,MAAxC;;SAEO3B,CAAP;;;ACLF;;;;AAGA,AAAe,SAASiI,UAAT,CAAoBD,OAApB,EAA6BhI,CAA7B,EAAgC;MACvCkI,SAASlI,EAAE,IAAF,EAAQgI,OAAR,CAAf;;MAEIE,OAAO1F,MAAP,GAAgB,CAApB,EAAuB;WACdrC,IAAP,CAAY,UAACiB,KAAD,EAAQf,IAAR;aAAiBL,EAAEK,IAAF,EAAQsB,MAAR,EAAjB;KAAZ;GADF,MAEO;WACExB,IAAP,CAAY,UAACiB,KAAD,EAAQf,IAAR,EAAiB;oBACbL,EAAEK,IAAF,CAAd,EAAuBL,CAAvB,EAA0B,IAA1B;KADF;;;SAKKA,CAAP;;;ACZF,SAASmI,qBAAT,CAA+BL,QAA/B,EAAyC;;WAE9BtG,IAAT,CAAc,GAAd,EAAmBrB,IAAnB,CAAwB,UAACiB,KAAD,EAAQf,IAAR,EAAiB;SAClCY,OAAL,GAAeoG,QAAQrG,OAAR,CAAgBX,KAAKY,OAArB,EAA8BmH,MAA9B,CAAqC,UAACC,GAAD,EAAM7H,IAAN,EAAe;UAC7DuD,mBAAmBxE,IAAnB,CAAwBiB,IAAxB,CAAJ,EAAmC;4BACrB6H,GAAZ,qBAAkB7H,IAAlB,EAAyBH,KAAKY,OAAL,CAAaT,IAAb,CAAzB;;;aAGK6H,GAAP;KALa,EAMZ,EANY,CAAf;GADF;;;;;;;;;;AAkBF,AAAe,SAASC,eAAT,CAAyBR,QAAzB,EAAmC;wBAC1BA,QAAtB;;SAEOA,QAAP;;;AC3Ba,SAASS,WAAT,CAAqBT,QAArB,EAA+B9H,CAA/B,EAAkC;WACtCwB,IAAT,CAAc,GAAd,EAAmBrB,IAAnB,CAAwB,UAACiB,KAAD,EAAQqF,CAAR,EAAc;QAC9B+B,KAAKxI,EAAEyG,CAAF,CAAX;QACI+B,GAAGC,IAAH,GAAUC,IAAV,OAAqB,EAAzB,EAA6BF,GAAG7G,MAAH;GAF/B;;SAKO3B,CAAP;;;ACNF;;;;;;AAMA,AAAO,IAAMsE,kCAAgC,CAC3C,UAD2C,EAE3C,OAF2C,EAG3C,QAH2C,EAI3C,SAJ2C,EAK3C,SAL2C,EAM3C,KAN2C,EAO3C,gBAP2C,EAQ3C,OAR2C,EAS3C,SAT2C,EAU3C,cAV2C,EAW3C,QAX2C,EAY3C,iBAZ2C,EAa3C,OAb2C,EAc3C,MAd2C,EAe3C,MAf2C,EAgB3C,QAhB2C,EAiB3C,QAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C;AAoB3C,MApB2C,EAqB3C,MArB2C,EAsB3C,KAtB2C,EAuB3C,OAvB2C,EAwB3C,YAxB2C,EAyB3C,UAzB2C;AA0B3C,2BA1B2C;AA2B3C,OA3B2C,EA4B3C,eA5B2C,EA6B3C,SA7B2C,EA8B3C,QA9B2C,EA+B3C,QA/B2C,EAgC3C,KAhC2C,EAiC3C,OAjC2C,EAkC3C,UAlC2C,EAmC3C,SAnC2C,EAoC3C,UApC2C,EAqC3C,SArC2C,EAsC3C,OAtC2C,CAAtC;;;;;;;;;;;;;AAoDP,AAAO,IAAMC,kCAAgC,CAC3C,KAD2C,EAE3C,SAF2C,EAG3C,MAH2C,EAI3C,WAJ2C,EAK3C,QAL2C,EAM3C,SAN2C,EAO3C,qBAP2C,EAQ3C,QAR2C;AAS3C,OAT2C,EAU3C,QAV2C,EAW3C,OAX2C,EAY3C,MAZ2C,EAa3C,MAb2C,EAc3C,OAd2C,EAe3C,QAf2C,CAAtC;;;;;AAqBP,AAAO,IAAMC,wBAAsB,CACjC,GADiC,EAEjC,YAFiC,EAGjC,IAHiC,EAIjC,KAJiC,EAKjC,KALiC,EAMjC,GANiC,EAOjC,KAPiC,EAQjC,OARiC,EASjCpG,IATiC,CAS5B,GAT4B,CAA5B;;;;AAaP,AAAO,IAAMqG,2BAAyB,CACpC,IADoC,EAEpC,GAFoC,EAGpC,GAHoC,EAIpC,OAJoC,EAKpC,IALoC,EAMpC,MANoC,EAOpC,MAPoC,EAQpC,UARoC,EASpC,OAToC,EAUpC,KAVoC,EAWpC,MAXoC,EAYpC,MAZoC,CAA/B;;AAeP,AAAO,IAAMC,8BACX,IAAIvG,MAAJ,QAAgBsG,yBAAuBrG,IAAvB,CAA4B,GAA5B,CAAhB,SAAsD,GAAtD,CADK;;;;;AAMP,AAAO,IAAMuK,4BAA0B,CACrC,CAAC,SAAD,EAAY,gBAAZ,CADqC,EAErC,CAAC,OAAD,EAAU,gBAAV,CAFqC,EAGrC,CAAC,QAAD,EAAW,gBAAX,CAHqC,EAIrC,CAAC,OAAD,EAAU,WAAV,CAJqC,EAKrC,CAAC,OAAD,EAAU,YAAV,CALqC,EAMrC,CAAC,OAAD,EAAU,YAAV,CANqC,CAAhC;;AASP,AAAO,IAAMhE,gBAAc,CACzB,QADyB,EAEzB,OAFyB,EAGzB,OAHyB,EAIzB,SAJyB,CAApB;AAMP,AAAO,IAAMC,mBAAiB,IAAIzG,MAAJ,CAAWwG,cAAYvG,IAAZ,CAAiB,GAAjB,CAAX,EAAkC,GAAlC,CAAvB;;;;;;AAOP,AAAO,IAAMyG,yBAAuB,CAClC,SADkC,EAElC,gBAFkC,EAGlC,iBAHkC,EAIlC,MAJkC,EAKlC,MALkC,EAMlC,SANkC,EAOlC,qBAPkC,EAQlC,OARkC,EASlC,QATkC,EAUlC,MAVkC,EAWlC,QAXkC,EAYlC,MAZkC,EAalC,YAbkC,EAclC,WAdkC,EAelC,MAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,UAlBkC;AAmBlC,SAnBkC,CAA7B;;;AAuBP,AAAO,IAAMC,sBAAoB,IAAI3G,MAAJ,CAAW0G,uBAAqBzG,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO,IAAMwK,sBAAoB,IAAIzK,MAAJ,CAAW,qBAAX,EAAkC,GAAlC,CAA1B;;;;;;AAMP,AAAO,IAAM4G,yBAAuB,CAClC,OADkC,EAElC,QAFkC,EAGlC,QAHkC,EAIlC,KAJkC,EAKlC,UALkC,EAMlC,QANkC,EAOlC,QAPkC,EAQlC,OARkC,EASlC,MATkC,EAUlC,OAVkC,EAWlC,SAXkC,EAYlC,YAZkC,EAalC,SAbkC,EAclC,MAdkC,EAelC,QAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,MAlBkC,EAmBlC,SAnBkC,EAoBlC,UApBkC;AAqBlC,MArBkC,EAsBlC,QAtBkC,EAuBlC,UAvBkC,EAwBlC,MAxBkC,EAyBlC,MAzBkC,EA0BlC,MA1BkC,EA2BlC,UA3BkC;AA4BlC,mBA5BkC,EA6BlC,MA7BkC,EA8BlC,WA9BkC,EA+BlC,MA/BkC,EAgClC,UAhCkC,EAiClC,OAjCkC,EAkClC,MAlCkC,EAmClC,OAnCkC,EAoClC,UApCkC;AAqClC,OArCkC,EAsClC,KAtCkC;AAuClC,SAvCkC,EAwClC,SAxCkC,EAyClC,cAzCkC;AA0ClC,QA1CkC,EA2ClC,WA3CkC,EA4ClC,OA5CkC,EA6ClC,UA7CkC,EA8ClC,UA9CkC,EA+ClC,MA/CkC,EAgDlC,SAhDkC,EAiDlC,SAjDkC,EAkDlC,OAlDkC,EAmDlC,KAnDkC,EAoDlC,SApDkC,EAqDlC,MArDkC,EAsDlC,OAtDkC,EAuDlC,QAvDkC,CAA7B;;AA0DP,AAAO,IAAMC,sBAAoB,IAAI7G,MAAJ,CAAW4G,uBAAqB3G,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;AAEP,AAGA,AAGA,AAGA;;AAEA,AAAO,IAAMiH,qBAAmB,CAC9B,SAD8B,EAE9B,OAF8B,EAG9B,YAH8B,EAI9B,MAJ8B,EAK9B,IAL8B,EAM9B,QAN8B,EAO9B,QAP8B,EAQ9B,SAR8B,EAS9B,KAT8B,EAU9B,UAV8B,EAW9B,IAX8B,EAY9B,KAZ8B,EAa9B,IAb8B,EAc9B,IAd8B,EAe9B,OAf8B,EAgB9B,UAhB8B,EAiB9B,YAjB8B,EAkB9B,QAlB8B,EAmB9B,QAnB8B,EAoB9B,MApB8B,EAqB9B,IArB8B,EAsB9B,IAtB8B,EAuB9B,IAvB8B,EAwB9B,IAxB8B,EAyB9B,IAzB8B,EA0B9B,IA1B8B,EA2B9B,QA3B8B,EA4B9B,QA5B8B,EA6B9B,IA7B8B,EA8B9B,IA9B8B,EA+B9B,KA/B8B,EAgC9B,QAhC8B,EAiC9B,IAjC8B,EAkC9B,QAlC8B,EAmC9B,GAnC8B,EAoC9B,KApC8B,EAqC9B,UArC8B,EAsC9B,SAtC8B,EAuC9B,OAvC8B,EAwC9B,OAxC8B,EAyC9B,UAzC8B,EA0C9B,OA1C8B,EA2C9B,IA3C8B,EA4C9B,OA5C8B,EA6C9B,IA7C8B,EA8C9B,IA9C8B,EA+C9B,OA/C8B,CAAzB;AAiDP,AAAO,IAAMC,wBAAsB,IAAInH,MAAJ,QAAgBkH,mBAAiBjH,IAAjB,CAAsB,GAAtB,CAAhB,SAAgD,GAAhD,CAA5B;;;;;;AAOP,IAAMmH,wBAAsBjB,gCAA8BlG,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAEA,IAAMqH,wBAAsBlB,gCAA8BnG,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAEA,AAGA,AAAO,IAAMyK,yBAAuB,IAAI1K,MAAJ,CAAW,mBAAX,EAAgC,GAAhC,CAA7B;AACP,AAAO,IAAM2K,uBAAqB,IAAI3K,MAAJ,CAAW,4BAAX,EAAyC,GAAzC,CAA3B;AACP,AAAO,IAAM4K,aAAW,IAAI5K,MAAJ,CAAW,kBAAX,EAA+B,GAA/B,CAAjB,CAEP;;AC3SA;AACA,AAAe,SAAS6K,SAAT,CAAmB3I,IAAnB,EAAyB;MAChCwF,UAAUxF,KAAKG,IAAL,CAAU,OAAV,CAAhB;MACMsF,KAAKzF,KAAKG,IAAL,CAAU,IAAV,CAAX;MACIyI,QAAQ,CAAZ;;MAEInD,EAAJ,EAAQ;;QAEFhB,oBAAkBvF,IAAlB,CAAuBuG,EAAvB,CAAJ,EAAgC;eACrB,EAAT;;QAEEd,oBAAkBzF,IAAlB,CAAuBuG,EAAvB,CAAJ,EAAgC;eACrB,EAAT;;;;MAIAD,OAAJ,EAAa;QACPoD,UAAU,CAAd,EAAiB;;;UAGXnE,oBAAkBvF,IAAlB,CAAuBsG,OAAvB,CAAJ,EAAqC;iBAC1B,EAAT;;UAEEb,oBAAkBzF,IAAlB,CAAuBsG,OAAvB,CAAJ,EAAqC;iBAC1B,EAAT;;;;;;;QAOAjB,iBAAerF,IAAf,CAAoBsG,OAApB,CAAJ,EAAkC;eACvB,EAAT;;;;;;;QAOE+C,oBAAkBrJ,IAAlB,CAAuBsG,OAAvB,CAAJ,EAAqC;eAC1B,EAAT;;;;SAIGoD,KAAP;;;ACpDF;;;AAGA,AAAe,SAASC,QAAT,CAAkB5I,KAAlB,EAAyB;SAC/B6I,WAAW7I,MAAME,IAAN,CAAW,OAAX,CAAX,KAAmC,IAA1C;;;ACJF;AACA,AAAe,SAAS4I,WAAT,CAAqBX,IAArB,EAA2B;SACjC,CAACA,KAAKY,KAAL,CAAW,IAAX,KAAoB,EAArB,EAAyB7G,MAAhC;;;ACFF,IAAM8G,QAAQ,IAAInL,MAAJ,CAAW,WAAX,EAAwB,GAAxB,CAAd;;AAEA,AAAe,SAASoL,WAAT,CAAqBC,UAArB,EAAgD;MAAf7G,OAAe,yDAAL,GAAK;;MACvD8G,SAASD,aAAa,EAA5B;;MAEIC,SAAS,CAAb,EAAgB;QACVC,oBAAJ;;;;;;;QAOIJ,MAAM/J,IAAN,CAAWoD,OAAX,CAAJ,EAAyB;oBACT8G,SAAS,CAAvB;KADF,MAEO;oBACSA,SAAS,IAAvB;;;WAGKE,KAAKC,GAAL,CAASD,KAAKE,GAAL,CAASH,WAAT,EAAsB,CAAtB,CAAT,EAAmC,CAAnC,CAAP;;;SAGK,CAAP;;;ACjBF;;AAEA,AAAe,SAASI,cAAT,CAAwBzJ,IAAxB,EAA8B;MACvC4I,QAAQ,CAAZ;MACMR,OAAOpI,KAAKoI,IAAL,GAAYC,IAAZ,EAAb;MACMc,aAAaf,KAAKjG,MAAxB;;;MAGIgH,aAAa,EAAjB,EAAqB;WACZ,CAAP;;;;WAIOJ,YAAYX,IAAZ,CAAT;;;;WAISc,YAAYC,UAAZ,CAAT;;;;;;MAMIf,KAAKsB,KAAL,CAAW,CAAC,CAAZ,MAAmB,GAAvB,EAA4B;aACjB,CAAT;;;SAGKd,KAAP;;;AC/Ba,SAASe,QAAT,CAAkB1J,KAAlB,EAAyBN,CAAzB,EAA4BiJ,KAA5B,EAAmC;QAC1CzI,IAAN,CAAW,OAAX,EAAoByI,KAApB;SACO3I,KAAP;;;ACEa,SAAS2J,QAAT,CAAkB3J,KAAlB,EAAyBN,CAAzB,EAA4BkK,MAA5B,EAAoC;MAC7C;QACIjB,QAAQkB,eAAe7J,KAAf,EAAsBN,CAAtB,IAA2BkK,MAAzC;aACS5J,KAAT,EAAgBN,CAAhB,EAAmBiJ,KAAnB;GAFF,CAGE,OAAOmB,CAAP,EAAU;;;;SAIL9J,KAAP;;;ACXF;AACA,AAAe,SAAS+J,WAAT,CAAqBhK,IAArB,EAA2BL,CAA3B,EAA8BiJ,KAA9B,EAAqC;MAC5CqB,SAASjK,KAAKiK,MAAL,EAAf;MACIA,MAAJ,EAAY;aACDA,MAAT,EAAiBtK,CAAjB,EAAoBiJ,QAAQ,IAA5B;;;SAGK5I,IAAP;;;ACFF;;;AAGA,AAAe,SAAS8J,cAAT,CAAwB7J,KAAxB,EAA+BN,CAA/B,EAAsD;MAApBuK,WAAoB,yDAAN,IAAM;;MAC/DtB,QAAQC,SAAS5I,KAAT,CAAZ;;MAEI2I,KAAJ,EAAW;WACFA,KAAP;;;UAGMuB,UAAUlK,KAAV,CAAR;;MAEIiK,WAAJ,EAAiB;aACNvB,UAAU1I,KAAV,CAAT;;;cAGUA,KAAZ,EAAmBN,CAAnB,EAAsBiJ,KAAtB;;SAEOA,KAAP;;;AClBF;;AAEA,AAAe,SAASuB,SAAT,CAAmBlK,KAAnB,EAA0B;mBACnBA,MAAMhC,GAAN,CAAU,CAAV,CADmB;;MAC/BqE,OAD+B,cAC/BA,OAD+B;;;;;;MAMnCkG,uBAAqBtJ,IAArB,CAA0BoD,OAA1B,CAAJ,EAAwC;WAC/BmH,eAAexJ,KAAf,CAAP;GADF,MAEO,IAAIqC,YAAY,KAAhB,EAAuB;WACrB,CAAP;GADK,MAEA,IAAImG,qBAAmBvJ,IAAnB,CAAwBoD,OAAxB,CAAJ,EAAsC;WACpC,CAAP;GADK,MAEA,IAAIoG,WAASxJ,IAAT,CAAcoD,OAAd,CAAJ,EAA4B;WAC1B,CAAC,CAAR;GADK,MAEA,IAAIA,YAAY,IAAhB,EAAsB;WACpB,CAAC,CAAR;;;SAGK,CAAP;;;ACjBF,SAASoE,cAAT,CAAsBzG,KAAtB,EAA6BN,CAA7B,EAAgC;MAC1BM,MAAMhC,GAAN,CAAU,CAAV,CAAJ,EAAkB;qBACIgC,MAAMhC,GAAN,CAAU,CAAV,CADJ;;QACRqE,OADQ,cACRA,OADQ;;;QAGZA,YAAY,MAAhB,EAAwB;;oBAERrC,KAAd,EAAqBN,CAArB,EAAwB,KAAxB;;;;;AAKN,SAASyK,UAAT,CAAoBnK,KAApB,EAA2BN,CAA3B,EAA8BiJ,KAA9B,EAAqC;MAC/B3I,KAAJ,EAAW;mBACIA,KAAb,EAAoBN,CAApB;aACSM,KAAT,EAAgBN,CAAhB,EAAmBiJ,KAAnB;;;;AAIJ,SAASyB,OAAT,CAAiB1K,CAAjB,EAAoBuK,WAApB,EAAiC;IAC7B,QAAF,EAAY3E,GAAZ,CAAgB,SAAhB,EAA2BzF,IAA3B,CAAgC,UAACiB,KAAD,EAAQf,IAAR,EAAiB;;;QAG3CC,QAAQN,EAAEK,IAAF,CAAZ;YACQ2J,SAAS1J,KAAT,EAAgBN,CAAhB,EAAmBmK,eAAe7J,KAAf,EAAsBN,CAAtB,EAAyBuK,WAAzB,CAAnB,CAAR;;QAEMzH,UAAUxC,MAAMgK,MAAN,EAAhB;QACMK,WAAWH,UAAUlK,KAAV,CAAjB;;eAEWwC,OAAX,EAAoB9C,CAApB,EAAuB2K,QAAvB,EAAiCJ,WAAjC;QACIzH,OAAJ,EAAa;;;iBAGAA,QAAQwH,MAAR,EAAX,EAA6BtK,CAA7B,EAAgC2K,WAAW,CAA3C,EAA8CJ,WAA9C;;GAbJ;;SAiBOvK,CAAP;;;;;AAKF,AAAe,SAAS4K,YAAT,CAAsB5K,CAAtB,EAA6C;MAApBuK,WAAoB,yDAAN,IAAM;;;;4BAGlCrJ,OAAxB,CAAgC,gBAAqC;;;QAAnC2J,cAAmC;QAAnBC,aAAmB;;MAC9DD,cAAL,SAAuBC,aAAvB,EAAwC3K,IAAxC,CAA6C,UAACiB,KAAD,EAAQf,IAAR,EAAiB;eACnDL,EAAEK,IAAF,EAAQiK,MAAR,CAAeO,cAAf,CAAT,EAAyC7K,CAAzC,EAA4C,EAA5C;KADF;GADF;;;;;;;UAWQA,CAAR,EAAWuK,WAAX;UACQvK,CAAR,EAAWuK,WAAX;;SAEOvK,CAAP;;;ACpEF,IAAM+K,eAAe,SAArB;;AAEA,AAAe,SAASC,eAAT,CAAyBvC,IAAzB,EAA+B;SACrCA,KAAKwC,OAAL,CAAaF,YAAb,EAA2B,GAA3B,EAAgCrC,IAAhC,EAAP;;;ACHF;;;;;AAKA,AAAe,SAASwC,cAAT,CAAwB1L,GAAxB,EAA6B2L,SAA7B,EAAwC;MAC/CC,UAAUD,UAAU3J,IAAV,CAAe;WAAM6J,GAAG9L,IAAH,CAAQC,GAAR,CAAN;GAAf,CAAhB;MACI4L,OAAJ,EAAa;WACJA,QAAQE,IAAR,CAAa9L,GAAb,EAAkB,CAAlB,CAAP;;;SAGK,IAAP;;;ACXF;;;;;;;;;;;;;;;;AAgBA,AAAO,IAAM+L,kBAAkB,IAAIpN,MAAJ,CAAW,0EAAX,EAAuF,GAAvF,CAAxB;;AAEP,AAAO,IAAMqN,eAAe,QAArB;;AAEP,AAAO,IAAMC,cAAc,WAApB;AACP,AAAO,IAAMC,cAAc,WAApB;;ACnBQ,SAASC,cAAT,CAAwBnM,GAAxB,EAA6B;MACpCoM,UAAUpM,IAAI6J,KAAJ,CAAUkC,eAAV,CAAhB;MACI,CAACK,OAAL,EAAc,OAAO,IAAP;;MAERC,UAAUnE,SAASkE,QAAQ,CAAR,CAAT,EAAqB,EAArB,CAAhB;;;;SAIOC,UAAU,GAAV,GAAgBA,OAAhB,GAA0B,IAAjC;;;ACVa,SAASC,YAAT,CAAsBtM,GAAtB,EAA2B;SACjCA,IAAIuM,KAAJ,CAAU,GAAV,EAAe,CAAf,EAAkBd,OAAlB,CAA0B,KAA1B,EAAiC,EAAjC,CAAP;;;ACOF,SAASe,aAAT,CAAuBC,OAAvB,EAAgC7K,KAAhC,EAAuC8K,sBAAvC,EAA+D;MACzDC,cAAc,IAAlB;;;;MAII/K,QAAQ,CAAR,IAAasK,YAAYnM,IAAZ,CAAiB0M,OAAjB,CAAb,IAA0CA,QAAQzJ,MAAR,GAAiB,CAA/D,EAAkE;kBAClD,IAAd;;;;;MAKEpB,UAAU,CAAV,IAAe6K,QAAQG,WAAR,OAA0B,OAA7C,EAAsD;kBACtC,KAAd;;;;;MAKEhL,QAAQ,CAAR,IAAa6K,QAAQzJ,MAAR,GAAiB,CAA9B,IAAmC,CAAC0J,sBAAxC,EAAgE;kBAChD,KAAd;;;SAGKC,WAAP;;;;;;AAMF,AAAe,SAASE,cAAT,CAAwB7M,GAAxB,EAA6B8M,MAA7B,EAAqC;MAC5C7M,YAAY6M,UAAU5M,IAAIC,KAAJ,CAAUH,GAAV,CAA5B;MACQ+M,QAF0C,GAEjB9M,SAFiB,CAE1C8M,QAF0C;MAEhCC,IAFgC,GAEjB/M,SAFiB,CAEhC+M,IAFgC;MAE1BC,IAF0B,GAEjBhN,SAFiB,CAE1BgN,IAF0B;;;MAI9CP,yBAAyB,KAA7B;MACMQ,kBAAkBD,KAAKV,KAAL,CAAW,GAAX,EACvBY,OADuB,GAEvBvE,MAFuB,CAEhB,UAACC,GAAD,EAAMuE,UAAN,EAAkBxL,KAAlB,EAA4B;QAC9B6K,UAAUW,UAAd;;;QAGIX,QAAQ9J,QAAR,CAAiB,GAAjB,CAAJ,EAA2B;2BACU8J,QAAQF,KAAR,CAAc,GAAd,CADV;;;;UAClBc,eADkB;UACDC,OADC;;UAErBrB,YAAYlM,IAAZ,CAAiBuN,OAAjB,CAAJ,EAA+B;kBACnBD,eAAV;;;;;;QAMAtB,gBAAgBhM,IAAhB,CAAqB0M,OAArB,KAAiC7K,QAAQ,CAA7C,EAAgD;gBACpC6K,QAAQhB,OAAR,CAAgBM,eAAhB,EAAiC,EAAjC,CAAV;;;;;;;QAOEnK,UAAU,CAAd,EAAiB;+BACUoK,aAAajM,IAAb,CAAkB0M,OAAlB,CAAzB;;;;QAIED,cAAcC,OAAd,EAAuB7K,KAAvB,EAA8B8K,sBAA9B,CAAJ,EAA2D;UACrDa,IAAJ,CAASd,OAAT;;;WAGK5D,GAAP;GAhCsB,EAiCrB,EAjCqB,CAAxB;;SAmCUkE,QAAV,UAAuBC,IAAvB,GAA8BE,gBAAgBC,OAAhB,GAA0BvO,IAA1B,CAA+B,GAA/B,CAA9B;;;AC3EF;;AAEA,IAAM4O,kBAAkB,IAAI7O,MAAJ,CAAW,QAAX,CAAxB;AACA,AAAe,SAAS8O,cAAT,CAAwBxE,IAAxB,EAA8B;SACpCuE,gBAAgBzN,IAAhB,CAAqBkJ,IAArB,CAAP;;;ACKF;;;;;AAKA,AAAe,SAASyE,aAAT,CAAuBC,UAAvB,EAAmCC,QAAnC,EAA6CpN,CAA7C,EAAgD;MACzD,CAACmN,WAAW7C,MAAX,GAAoB9H,MAAzB,EAAiC;WACxB2K,UAAP;;;MAGIE,wBAAwB1D,KAAKE,GAAL,CAAS,EAAT,EAAauD,WAAW,IAAxB,CAA9B;MACME,cAActN,EAAE,aAAF,CAApB;;aAEWsK,MAAX,GAAoB/H,QAApB,GAA+BpC,IAA/B,CAAoC,UAACiB,KAAD,EAAQmF,OAAR,EAAoB;QAChDgH,WAAWvN,EAAEuG,OAAF,CAAjB;;QAEI7B,4BAA0BnF,IAA1B,CAA+BgH,QAAQ5D,OAAvC,CAAJ,EAAqD;aAC5C,IAAP;;;QAGI6K,eAAetE,SAASqE,QAAT,CAArB;QACIC,YAAJ,EAAkB;UACZD,aAAaJ,UAAjB,EAA6B;oBACf/J,MAAZ,CAAmBmK,QAAnB;OADF,MAEO;YACDE,eAAe,CAAnB;YACMC,UAAUC,YAAYJ,QAAZ,CAAhB;;;;YAIIG,UAAU,IAAd,EAAoB;0BACF,EAAhB;;;;;YAKEA,WAAW,GAAf,EAAoB;0BACF,EAAhB;;;;;YAKEH,SAAS/M,IAAT,CAAc,OAAd,MAA2B2M,WAAW3M,IAAX,CAAgB,OAAhB,CAA/B,EAAyD;0BACvC4M,WAAW,GAA3B;;;YAGIQ,WAAWJ,eAAeC,YAAhC;;YAEIG,YAAYP,qBAAhB,EAAuC;iBAC9BC,YAAYlK,MAAZ,CAAmBmK,QAAnB,CAAP;SADF,MAEO,IAAIhH,QAAQ5D,OAAR,KAAoB,GAAxB,EAA6B;cAC5BkL,iBAAiBN,SAAS9E,IAAT,EAAvB;cACMqF,uBAAuBtE,WAAWqE,cAAX,CAA7B;;cAEIC,uBAAuB,EAAvB,IAA6BJ,UAAU,IAA3C,EAAiD;mBACxCJ,YAAYlK,MAAZ,CAAmBmK,QAAnB,CAAP;WADF,MAEO,IAAIO,wBAAwB,EAAxB,IAA8BJ,YAAY,CAA1C,IACDT,eAAeY,cAAf,CADH,EACmC;mBACjCP,YAAYlK,MAAZ,CAAmBmK,QAAnB,CAAP;;;;;;WAMD,IAAP;GAnDF;;SAsDOD,WAAP;;;ACxEF;;AAEA,AAAe,SAASS,gBAAT,CAA0B/N,CAA1B,EAA6B;MACtCmN,mBAAJ;MACIC,WAAW,CAAf;;IAEE,SAAF,EAAajN,IAAb,CAAkB,UAACiB,KAAD,EAAQf,IAAR,EAAiB;;QAE7BqE,4BAA0BnF,IAA1B,CAA+Bc,KAAKsC,OAApC,CAAJ,EAAkD;;;;QAI5CrC,QAAQN,EAAEK,IAAF,CAAd;QACM4I,QAAQC,SAAS5I,KAAT,CAAd;;QAEI2I,QAAQmE,QAAZ,EAAsB;iBACTnE,KAAX;mBACa3I,KAAb;;GAXJ;;;;MAiBI,CAAC6M,UAAL,EAAiB;WACRnN,EAAE,MAAF,KAAaA,EAAE,GAAF,EAAOgO,KAAP,EAApB;;;eAGWd,cAAcC,UAAd,EAA0BC,QAA1B,EAAoCpN,CAApC,CAAb;;SAEOmN,UAAP;;;ACtBF,SAASc,mBAAT,CAA6B3N,KAA7B,EAAoCN,CAApC,EAAuCkO,MAAvC,EAA+C;;;;;MAKzC5N,MAAM6N,QAAN,CAAe,qBAAf,CAAJ,EAA2C;;;;MAIrCjM,UAAU8I,gBAAgB1K,MAAMmI,IAAN,EAAhB,CAAhB;;MAEIW,YAAYlH,OAAZ,IAAuB,EAA3B,EAA+B;QACvBkM,SAASpO,EAAE,GAAF,EAAOM,KAAP,EAAckC,MAA7B;QACM6L,aAAarO,EAAE,OAAF,EAAWM,KAAX,EAAkBkC,MAArC;;;QAGI6L,aAAcD,SAAS,CAA3B,EAA+B;YACvBzM,MAAN;;;;QAIIrC,gBAAgB4C,QAAQM,MAA9B;QACM8L,WAAWtO,EAAE,KAAF,EAASM,KAAT,EAAgBkC,MAAjC;;;;QAIIlD,gBAAgB,EAAhB,IAAsBgP,aAAa,CAAvC,EAA0C;YAClC3M,MAAN;;;;QAII+L,UAAUC,YAAYrN,KAAZ,CAAhB;;;;;QAKI4N,SAAS,EAAT,IAAeR,UAAU,GAAzB,IAAgCpO,gBAAgB,EAApD,EAAwD;YAChDqC,MAAN;;;;;;QAMEuM,UAAU,EAAV,IAAgBR,UAAU,GAA9B,EAAmC;;;;UAI3B/K,UAAUrC,MAAMhC,GAAN,CAAU,CAAV,EAAaqE,OAA7B;UACM4L,aAAa5L,YAAY,IAAZ,IAAoBA,YAAY,IAAnD;UACI4L,UAAJ,EAAgB;YACRC,eAAelO,MAAMmO,IAAN,EAArB;YACID,gBAAgBxD,gBAAgBwD,aAAa/F,IAAb,EAAhB,EAAqCsB,KAArC,CAA2C,CAAC,CAA5C,MAAmD,GAAvE,EAA4E;;;;;YAKxEpI,MAAN;;;;QAII+M,cAAc1O,EAAE,QAAF,EAAYM,KAAZ,EAAmBkC,MAAvC;;;QAGIkM,cAAc,CAAd,IAAmBpP,gBAAgB,GAAvC,EAA4C;YACpCqC,MAAN;;;;;;;;;;;;;AAaN,AAAe,SAASgN,SAAT,CAAmB7G,QAAnB,EAA6B9H,CAA7B,EAAgC;IAC3CmE,wBAAF,EAA4B2D,QAA5B,EAAsC3H,IAAtC,CAA2C,UAACiB,KAAD,EAAQf,IAAR,EAAiB;QACpDC,QAAQN,EAAEK,IAAF,CAAd;QACI6N,SAAShF,SAAS5I,KAAT,CAAb;QACI,CAAC4N,MAAL,EAAa;eACF/D,eAAe7J,KAAf,EAAsBN,CAAtB,CAAT;eACSM,KAAT,EAAgBN,CAAhB,EAAmBkO,MAAnB;;;;QAIEA,SAAS,CAAb,EAAgB;YACRvM,MAAN;KADF,MAEO;;0BAEerB,KAApB,EAA2BN,CAA3B,EAA8BkO,MAA9B;;GAbJ;;SAiBOlO,CAAP;;;ACrGa,SAAS4O,YAAT,CAAsB9G,QAAtB,EAAgC9H,CAAhC,EAA+C;MAAZ6O,KAAY,yDAAJ,EAAI;;IAC1DxK,eAAF,EAAmByD,QAAnB,EAA6B3H,IAA7B,CAAkC,UAACiB,KAAD,EAAQ0N,MAAR,EAAmB;QAC7CC,UAAU/O,EAAE8O,MAAF,CAAhB;;;;;QAKI9O,EAAE+O,OAAF,EAAWjH,QAAX,EAAqBkH,OAArB,CAA6B,GAA7B,EAAkCxM,MAAlC,KAA6C,CAAjD,EAAoD;aAC3CuM,QAAQpN,MAAR,EAAP;;;;QAIEqJ,gBAAgBhL,EAAE8O,MAAF,EAAUrG,IAAV,EAAhB,MAAsCoG,KAA1C,EAAiD;aACxCE,QAAQpN,MAAR,EAAP;;;;;QAKEqH,UAAUhJ,EAAE8O,MAAF,CAAV,IAAuB,CAA3B,EAA8B;aACrBC,QAAQpN,MAAR,EAAP;;;WAGKoN,OAAP;GArBF;;SAwBO/O,CAAP;;;AC5BF;;;AAEA,AAAe,SAASiP,eAAT,CAAyBjH,OAAzB,EAAkChI,CAAlC,EAAqC;;;;MAI9CmH,cAAcnH,EAAE,MAAF,CAAd,EAAyBA,CAAzB,EAA4B,KAA5B,CAAJ;MACImH,cAAcnH,EAAE,MAAF,CAAd,EAAyBA,CAAzB,EAA4B,KAA5B,CAAJ;;SAEOA,CAAP;;;ACTF,SAASkP,UAAT,CAAoBlP,CAApB,EAAuBmP,OAAvB,EAAgC3O,IAAhC,EAAsC4O,QAAtC,EAAgD;UACxC5O,IAAN,QAAe4O,QAAf,EAAyBjP,IAAzB,CAA8B,UAACC,CAAD,EAAIC,IAAJ,EAAa;QACnCb,MAAMa,KAAKY,OAAL,CAAaT,IAAb,CAAZ;QACM6O,cAAc3P,IAAIjB,OAAJ,CAAY0Q,OAAZ,EAAqB3P,GAArB,CAApB;;SAEKyB,OAAL,CAAaT,IAAb,IAAqB6O,WAArB;GAJF;;;AAQF,AAAe,SAASC,iBAAT,CAA2BF,QAA3B,EAAqCpP,CAArC,EAAwCR,GAAxC,EAA6C;GACzD,MAAD,EAAS,KAAT,EAAgB0B,OAAhB,CAAwB;WAAQgO,WAAWlP,CAAX,EAAcR,GAAd,EAAmBgB,IAAnB,EAAyB4O,QAAzB,CAAR;GAAxB;;SAEOA,QAAP;;;ACbK,SAAS5F,UAAT,CAAoBf,IAApB,EAA0B;SACxBA,KAAKC,IAAL,GACKuC,OADL,CACa,MADb,EACqB,GADrB,EAEKzI,MAFZ;;;;;;AAQF,AAAO,SAASmL,WAAT,CAAqBrN,KAArB,EAA4B;MAC3BiP,kBAAkB/F,WAAWlJ,MAAMmI,IAAN,EAAX,CAAxB;;MAEM+G,WAAWlP,MAAMkB,IAAN,CAAW,GAAX,EAAgBiH,IAAhB,EAAjB;MACMgH,aAAajG,WAAWgG,QAAX,CAAnB;;MAEID,kBAAkB,CAAtB,EAAyB;WAChBE,aAAaF,eAApB;GADF,MAEO,IAAIA,oBAAoB,CAApB,IAAyBE,aAAa,CAA1C,EAA6C;WAC3C,CAAP;;;SAGK,CAAP;;;ACpBF;;;AAEA,AAAe,SAASC,eAAT,CACb1P,CADa,EAEb2P,SAFa,EAGbC,WAHa,EAKb;MADAjB,SACA,yDADY,IACZ;;MACMkB,aAAaF,UAAUjO,MAAV,CAAiB;WAAQkO,YAAYE,OAAZ,CAAoBC,IAApB,MAA8B,CAAC,CAAvC;GAAjB,CAAnB;;;;;;;;UAEWA,IAHX;;UAIQ1O,OAAO,MAAb;UACMd,QAAQ,OAAd;;UAEMyP,QAAQhQ,YAAUqB,IAAV,UAAmB0O,IAAnB,QAAd;;;;;UAKME,SACJD,MAAMrM,GAAN,CAAU,UAACvC,KAAD,EAAQf,IAAR;eAAiBL,EAAEK,IAAF,EAAQG,IAAR,CAAaD,KAAb,CAAjB;OAAV,EACM2P,OADN,GAEMxO,MAFN,CAEa;eAAQ+G,SAAS,EAAjB;OAFb,CADF;;;;;;UASIwH,OAAOzN,MAAP,KAAkB,CAAtB,EAAyB;YACnB2N,kBAAJ;;;YAGIxB,SAAJ,EAAe;sBACDyB,UAAUH,OAAO,CAAP,CAAV,EAAqBjQ,CAArB,CAAZ;SADF,MAEO;sBACOiQ,OAAO,CAAP,CAAZ;;;;aAGKE;;;;;yBA5BQN,UAAnB,8HAA+B;;;;;;;;;;;;;;;;;;;;;;SAiCxB,IAAP;;;AC3CF,SAASQ,UAAT,CAAoB/P,KAApB,EAA2BgQ,WAA3B,EAAwC;;;MAGlChQ,MAAMiC,QAAN,GAAiBC,MAAjB,GAA0B8N,WAA9B,EAA2C;WAClC,KAAP;;;MAGEC,cAAcjQ,KAAd,CAAJ,EAA0B;WACjB,KAAP;;;SAGK,IAAP;;;;;;AAMF,AAAe,SAASkQ,oBAAT,CACbxQ,CADa,EAEbyQ,SAFa,EAKb;MAFAH,WAEA,yDAFc,CAEd;MADAI,QACA,yDADW,IACX;;;;;;yBACuBD,SAAvB,8HAAkC;UAAvB7M,QAAuB;;UAC1BoM,QAAQhQ,EAAE4D,QAAF,CAAd;;;;UAIIoM,MAAMxN,MAAN,KAAiB,CAArB,EAAwB;YAChBlC,QAAQN,EAAEgQ,MAAM,CAAN,CAAF,CAAd;;YAEIK,WAAW/P,KAAX,EAAkBgQ,WAAlB,CAAJ,EAAoC;cAC9BpO,gBAAJ;cACIwO,QAAJ,EAAc;sBACFpQ,MAAMmI,IAAN,EAAV;WADF,MAEO;sBACKnI,MAAMqQ,IAAN,EAAV;;;cAGEzO,OAAJ,EAAa;mBACJA,OAAP;;;;;;;;;;;;;;;;;;;;SAMD,IAAP;;;AChDF;AACA,AAAe,SAASkO,SAAT,CAAmB3H,IAAnB,EAAyBzI,CAAzB,EAA4B;;;MAGnC4Q,YAAY5Q,aAAWyI,IAAX,cAA0BA,IAA1B,EAAlB;SACOmI,cAAc,EAAd,GAAmBnI,IAAnB,GAA0BmI,SAAjC;;;ACLa,SAASL,aAAT,CAAuBjQ,KAAvB,EAA8B;MACrCyC,UAAUzC,MAAMyC,OAAN,GAAgBmN,OAAhB,EAAhB;MACMW,gBAAgB9N,QAAQvB,IAAR,CAAa,UAAC8I,MAAD,EAAY;QACvCvE,aAAgBuE,OAAOrJ,OAAP,CAAe6P,KAA/B,SAAwCxG,OAAOrJ,OAAP,CAAe6E,EAA7D;WACOC,WAAW5D,QAAX,CAAoB,SAApB,CAAP;GAFoB,CAAtB;;SAKO0O,kBAAkBE,SAAzB;;;ACPF;;;;AAIA,AAAe,SAASC,gBAAT,CAA0B1Q,KAA1B,EAAiC;SACvCA,MAAMmI,IAAN,GAAaC,IAAb,GAAoBlG,MAApB,IAA8B,GAArC;;;ACHa,SAASyO,WAAT,CAAqBjR,CAArB,EAAwB;SAC9BA,EAAEiF,cAAF,EAAkBzC,MAAlB,GAA2B,CAAlC;;;ACHF;AACA,AAAO,IAAM0O,kBAAkB,wCAAxB;;;;AAIP,AAAO,IAAMC,eAAe,IAAIhT,MAAJ,CAAW,aAAX,EAA0B,GAA1B,CAArB;AACP,AAYA,AASA;AACA,AAAO,IAAMiT,iBAAiB,WAAvB;AACP,AAAO,IAAMC,kBAAkB,WAAxB;AACP,AAAO,IAAMC,uBAAuB,4BAA7B;AACP,AAAO,IAAMC,yBAAyB,oBAA/B;AACP,AAAO,IAAMC,wBAAwB,QAA9B;AACP,IAAMC,SAAS,CACb,KADa,EAEb,KAFa,EAGb,KAHa,EAIb,KAJa,EAKb,KALa,EAMb,KANa,EAOb,KAPa,EAQb,KARa,EASb,KATa,EAUb,KAVa,EAWb,KAXa,EAYb,KAZa,CAAf;AAcA,IAAMC,YAAYD,OAAOrT,IAAP,CAAY,GAAZ,CAAlB;AACA,IAAMuT,aAAa,qCAAnB;AACA,IAAMC,aAAa,wCAAnB;AACA,AAAO,IAAMC,oBACX,IAAI1T,MAAJ,OAAewT,UAAf,WAA+BC,UAA/B,wBAA4DF,SAA5D,QAA0E,IAA1E,CADK;;;;;AAMP,AAAO,IAAMI,qBAAqB,gBAA3B;;AAEP,AAAO,IAAMC,oBACX,IAAI5T,MAAJ,CAAW,2BAAX,EAAwC,GAAxC,CADK;;ACxDP;;AAEA,AAAe,SAAS6T,WAAT,CAAqBC,MAArB,EAA6B;SACnCA,OAAOhH,OAAP,CAAeiG,eAAf,EAAgC,IAAhC,EAAsCxI,IAAtC,EAAP;;;ACHa,SAAS9G,OAAT,CAAesQ,YAAf,EAA6B;iBAC3BA,aAAaxJ,IAAb,EAAf;MACIyJ,SAASC,QAAT,CAAkBF,YAAlB,CAAJ,EAAqC;WAC5BA,YAAP;;;SAGK,IAAP;;;ACJF;;AAEA,AAAe,SAASG,QAAT,CAAkBC,GAAlB,QAA8B;MAALtS,CAAK,QAALA,CAAK;;;MAEvCsS,IAAI9P,MAAJ,GAAa,IAAb,IAAqB8P,IAAI9P,MAAJ,GAAa,CAAtC,EAAyC,OAAO,IAAP;;MAEnC+P,UAAUnC,UAAUkC,GAAV,EAAetS,CAAf,CAAhB;;;;MAIImR,aAAa5R,IAAb,CAAkBgT,OAAlB,CAAJ,EAAgC,OAAO,IAAP;;SAEzBA,QAAQ7J,IAAR,EAAP;;;ACfF;;;;AAIA,AASA,AAAO,SAAS8J,eAAT,CAAyBC,UAAzB,EAAqC;SACnC,CAACA,WAAWpJ,KAAX,CAAiBwI,iBAAjB,KAAuC,EAAxC,EACWzT,IADX,CACgB,GADhB,EAEW6M,OAFX,CAEmBuG,qBAFnB,EAE0C,GAF1C,EAGWvG,OAHX,CAGmBsG,sBAHnB,EAG2C,UAH3C,EAIWtG,OAJX,CAImBqG,oBAJnB,EAIyC,IAJzC,EAKW5I,IALX,EAAP;;;;;AAUF,AAAe,SAASgK,kBAAT,CAA4BD,UAA5B,EAAwC;;MAEjDrB,eAAe7R,IAAf,CAAoBkT,UAApB,KAAmCpB,gBAAgB9R,IAAhB,CAAqBkT,UAArB,CAAvC,EAAyE;iBAC1D/K,SAAS+K,UAAT,EAAqB,EAArB,CAAb;;;MAGEE,OAAOC,OAAO,IAAIC,IAAJ,CAASJ,UAAT,CAAP,CAAX;;MAEI,CAACE,KAAKG,OAAL,EAAL,EAAqB;iBACNN,gBAAgBC,UAAhB,CAAb;WACOG,OAAO,IAAIC,IAAJ,CAASJ,UAAT,CAAP,CAAP;;;SAGKE,KAAKG,OAAL,KAAiBH,KAAKI,WAAL,EAAjB,GAAsC,IAA7C;;;AC1BF;;AACA,AAAe,SAASC,gBAAT,CACbhL,OADa,QAQb;MALEhI,CAKF,QALEA,CAKF;mCAJEiT,kBAIF;MAJEA,kBAIF,yCAJuB,IAIvB;wBAHEpE,KAGF;MAHEA,KAGF,8BAHU,EAGV;sBAFErP,GAEF;MAFEA,GAEF,4BAFQ,EAER;;;;kBAGgBwI,OAAhB,EAAyBhI,CAAzB;;;cAGYgI,OAAZ,EAAqBhI,CAArB;;;;gBAIcgI,OAAd,EAAuBhI,CAAvB;;;;;aAKWgI,OAAX,EAAoBhI,CAApB;;;eAGagI,OAAb,EAAsBhI,CAAtB,EAAyB6O,KAAzB;;;oBAGkB7G,OAAlB,EAA2BhI,CAA3B,EAA8BR,GAA9B;;;kBAGgBwI,OAAhB;;;;;YAKUA,OAAV,EAAmBhI,CAAnB,EAAsBiT,kBAAtB;;;cAGYjL,OAAZ,EAAqBhI,CAArB;;SAEOgI,OAAP;;;AClDa,SAASkL,UAAT,CAAoBrE,KAApB,QAAuC;MAAVrP,GAAU,QAAVA,GAAU;MAALQ,CAAK,QAALA,CAAK;;;;MAGhD8R,mBAAmBvS,IAAnB,CAAwBsP,KAAxB,CAAJ,EAAoC;YAC1BsE,kBAAkBtE,KAAlB,EAAyBrP,GAAzB,CAAR;;;;;MAKEqP,MAAMrM,MAAN,GAAe,GAAnB,EAAwB;;QAEhB4Q,KAAKpT,EAAE,IAAF,CAAX;QACIoT,GAAG5Q,MAAH,KAAc,CAAlB,EAAqB;cACX4Q,GAAG3K,IAAH,EAAR;;;;;SAKG2H,UAAUvB,KAAV,EAAiB7O,CAAjB,EAAoB0I,IAApB,EAAP;;;ACdF,SAAS2K,sBAAT,CAAgCC,UAAhC,EAA4C7K,IAA5C,EAAkD;;;;MAI5C6K,WAAW9Q,MAAX,IAAqB,CAAzB,EAA4B;;;;;UAIpB+Q,aAAaD,WAAWlL,MAAX,CAAkB,UAACC,GAAD,EAAMmL,SAAN,EAAoB;YACnDA,SAAJ,IAAiBnL,IAAImL,SAAJ,IAAiBnL,IAAImL,SAAJ,IAAiB,CAAlC,GAAsC,CAAvD;eACOnL,GAAP;OAFiB,EAGhB,EAHgB,CAAnB;;kCAMEhB,QAAQrG,OAAR,CAAgBuS,UAAhB,EACQnL,MADR,CACe,UAACC,GAAD,EAAMf,GAAN,EAAc;YAChBe,IAAI,CAAJ,IAASkL,WAAWjM,GAAX,CAAb,EAA8B;iBACrB,CAACA,GAAD,EAAMiM,WAAWjM,GAAX,CAAN,CAAP;;;eAGKe,GAAP;OANT,EAOU,CAAC,CAAD,EAAI,CAAJ,CAPV,CAVwB;;;;UASnBoL,OATmB;UASVC,SATU;;;;;;;UAuBtBA,aAAa,CAAb,IAAkBD,QAAQjR,MAAR,IAAkB,CAAxC,EAA2C;qBAC5BiG,KAAKsD,KAAL,CAAW0H,OAAX,CAAb;;;UAGIE,YAAY,CAACL,WAAW,CAAX,CAAD,EAAgBA,WAAWvJ,KAAX,CAAiB,CAAC,CAAlB,CAAhB,CAAlB;UACM6J,aAAaD,UAAUvL,MAAV,CAAiB,UAACC,GAAD,EAAM1K,GAAN;eAAc0K,IAAI7F,MAAJ,GAAa7E,IAAI6E,MAAjB,GAA0B6F,GAA1B,GAAgC1K,GAA9C;OAAjB,EAAoE,EAApE,CAAnB;;UAEIiW,WAAWpR,MAAX,GAAoB,EAAxB,EAA4B;;aACnBoR;;;;;WAGFnL;;;;;;;SAGF,IAAP;;;AAGF,SAASoL,oBAAT,CAA8BP,UAA9B,EAA0C9T,GAA1C,EAA+C;;;;;;;mBAO5BE,IAAIC,KAAJ,CAAUH,GAAV,CAP4B;;MAOrCgN,IAPqC,cAOrCA,IAPqC;;MAQvCsH,cAActH,KAAKvB,OAAL,CAAa8G,iBAAb,EAAgC,EAAhC,CAApB;;MAEMgC,YAAYT,WAAW,CAAX,EAAclH,WAAd,GAA4BnB,OAA5B,CAAoC,GAApC,EAAyC,EAAzC,CAAlB;MACM+I,iBAAiBC,MAAMC,WAAN,CAAkBH,SAAlB,EAA6BD,WAA7B,CAAvB;;MAEIE,iBAAiB,GAAjB,IAAwBD,UAAUvR,MAAV,GAAmB,CAA/C,EAAkD;WACzC8Q,WAAWvJ,KAAX,CAAiB,CAAjB,EAAoB3L,IAApB,CAAyB,EAAzB,CAAP;;;MAGI+V,UAAUb,WAAWvJ,KAAX,CAAiB,CAAC,CAAlB,EAAqB,CAArB,EAAwBqC,WAAxB,GAAsCnB,OAAtC,CAA8C,GAA9C,EAAmD,EAAnD,CAAhB;MACMmJ,eAAeH,MAAMC,WAAN,CAAkBC,OAAlB,EAA2BL,WAA3B,CAArB;;MAEIM,eAAe,GAAf,IAAsBD,QAAQ3R,MAAR,IAAkB,CAA5C,EAA+C;WACtC8Q,WAAWvJ,KAAX,CAAiB,CAAjB,EAAoB,CAAC,CAArB,EAAwB3L,IAAxB,CAA6B,EAA7B,CAAP;;;SAGK,IAAP;;;;;AAKF,AAAe,SAAS+U,iBAAT,CAA2BtE,KAA3B,EAA4C;MAAVrP,GAAU,yDAAJ,EAAI;;;;MAGnD8T,aAAazE,MAAM9C,KAAN,CAAY+F,kBAAZ,CAAnB;MACIwB,WAAW9Q,MAAX,KAAsB,CAA1B,EAA6B;WACpBqM,KAAP;;;MAGEwF,WAAWhB,uBAAuBC,UAAvB,EAAmCzE,KAAnC,CAAf;MACIwF,QAAJ,EAAc,OAAOA,QAAP;;aAEHR,qBAAqBP,UAArB,EAAiC9T,GAAjC,CAAX;MACI6U,QAAJ,EAAc,OAAOA,QAAP;;;;SAIPxF,KAAP;;;AC3FF,IAAMyF,WAAW;UACPtC,WADO;kBAECuC,OAFD;OAGVlC,QAHU;kBAICK,kBAJD;WAKN8B,gBALM;SAMRtB;CANT,CAUA,AAEA,AACA,AACA,AACA,AACA,AACA,AACA;;ACfA;;;;;;;;;;;AAWA,AAAe,SAASuB,eAAT,CAAyBzU,CAAzB,EAA4B0U,IAA5B,EAAkC;;;;;;;MAO3CA,KAAK/O,uBAAT,EAAkC;QAC5BA,wBAAwB3F,CAAxB,CAAJ;;;MAGEkH,oBAAoBlH,CAApB,CAAJ;MACI4K,aAAa5K,CAAb,EAAgB0U,KAAKnK,WAArB,CAAJ;MACMoK,gBAAgB5G,iBAAiB/N,CAAjB,CAAtB;;SAEO2U,aAAP;;;AC3BF,IAAMC,0BAA0B;eACjB;6BACc,IADd;iBAEE,IAFF;wBAGS;GAJQ;;;;;;;;;;;;;;;;;;;;;SAAA,yBA0BGF,IA1BH,EA0BS;QAA7B1U,CAA6B,QAA7BA,CAA6B;QAA1B2Q,IAA0B,QAA1BA,IAA0B;QAApB9B,KAAoB,QAApBA,KAAoB;QAAbrP,GAAa,QAAbA,GAAa;;wBACzB,KAAKqV,WAAjB,EAAiCH,IAAjC;;QAEI1U,KAAKoC,QAAQC,IAAR,CAAasO,IAAb,CAAT;;;;QAIItQ,OAAO,KAAKyU,cAAL,CAAoB9U,CAApB,EAAuB6O,KAAvB,EAA8BrP,GAA9B,EAAmCkV,IAAnC,CAAX;;QAEI1D,iBAAiB3Q,IAAjB,CAAJ,EAA4B;aACnB,KAAK0U,kBAAL,CAAwB1U,IAAxB,EAA8BL,CAA9B,CAAP;;;;;;;;;;2BAKgBqH,QAAQrG,OAAR,CAAgB0T,IAAhB,EAAsBhT,MAAtB,CAA6B;eAAKgT,KAAKM,CAAL,MAAY,IAAjB;OAA7B,CAAlB,8HAAuE;YAA5D1N,GAA4D;;aAChEA,GAAL,IAAY,KAAZ;YACIlF,QAAQC,IAAR,CAAasO,IAAb,CAAJ;;eAEO,KAAKmE,cAAL,CAAoB9U,CAApB,EAAuB6O,KAAvB,EAA8BrP,GAA9B,EAAmCkV,IAAnC,CAAP;;YAEI1D,iBAAiB3Q,IAAjB,CAAJ,EAA4B;;;;;;;;;;;;;;;;;;;WAKvB,KAAK0U,kBAAL,CAAwB1U,IAAxB,EAA8BL,CAA9B,CAAP;GApD4B;;;;gBAAA,0BAwDfA,CAxDe,EAwDZ6O,KAxDY,EAwDLrP,GAxDK,EAwDAkV,IAxDA,EAwDM;WAC3BF,iBACGC,gBAAgBzU,CAAhB,EAAmB0U,IAAnB,CADH,EAEL;UAAA;0BAEsBA,KAAKzB,kBAF3B;kBAAA;;KAFK,CAAP;GAzD4B;;;;;;oBAAA,8BAsEX5S,IAtEW,EAsELL,CAtEK,EAsEF;QACtB,CAACK,IAAL,EAAW;aACF,IAAP;;;WAGK2K,gBAAgBhL,EAAE2Q,IAAF,CAAOtQ,IAAP,CAAhB,CAAP;;;;;;;CA3EJ,CAqFA;;AC9FA;;;;;;;AAOA,AAAO,IAAM4U,yBAAyB,CACpC,iBADoC,EAEpC,UAFoC,EAGpC,SAHoC,EAIpC,UAJoC,EAKpC,OALoC,CAA/B;;;;AAUP,AAAO,IAAMC,uBAAuB,CAClC,UADkC,CAA7B;;;;;;;;;AAWP,AAAO,IAAMC,yBAAyB,CACpC,sBADoC,EAEpC,kBAFoC,EAGpC,kBAHoC,EAIpC,YAJoC,EAKpC,mBALoC,EAMpC,cANoC,CAA/B;;AASP,AAAO,IAAMC,uBAAuB,CAClC,YADkC,EAElC,cAFkC,EAGlC,cAHkC,EAIlC,aAJkC,EAKlC,aALkC,EAMlC,aANkC,EAOlC,aAPkC,EAQlC,eARkC,EASlC,eATkC,EAUlC,iBAVkC,EAWlC,UAXkC,EAYlC,YAZkC,EAalC,IAbkC,EAclC,iBAdkC,EAelC,OAfkC,CAA7B;;ACxBP,IAAMC,wBAAwB;SAAA,yBACG;QAArBrV,CAAqB,QAArBA,CAAqB;QAAlBR,GAAkB,QAAlBA,GAAkB;QAAb8V,SAAa,QAAbA,SAAa;;;;QAGzBzG,cAAJ;;YAEQa,gBAAgB1P,CAAhB,EAAmBiV,sBAAnB,EAA2CK,SAA3C,CAAR;QACIzG,KAAJ,EAAW,OAAOqE,WAAWrE,KAAX,EAAkB,EAAErP,QAAF,EAAOQ,IAAP,EAAlB,CAAP;;;;YAIHwQ,qBAAqBxQ,CAArB,EAAwBmV,sBAAxB,CAAR;QACItG,KAAJ,EAAW,OAAOqE,WAAWrE,KAAX,EAAkB,EAAErP,QAAF,EAAOQ,IAAP,EAAlB,CAAP;;;YAGH0P,gBAAgB1P,CAAhB,EAAmBkV,oBAAnB,EAAyCI,SAAzC,CAAR;QACIzG,KAAJ,EAAW,OAAOqE,WAAWrE,KAAX,EAAkB,EAAErP,QAAF,EAAOQ,IAAP,EAAlB,CAAP;;;YAGHwQ,qBAAqBxQ,CAArB,EAAwBoV,oBAAxB,CAAR;QACIvG,KAAJ,EAAW,OAAOqE,WAAWrE,KAAX,EAAkB,EAAErP,QAAF,EAAOQ,IAAP,EAAlB,CAAP;;;WAGJ,EAAP;;CAvBJ,CA2BA;;ACxCA;;;;;;AAMA,AAAO,IAAMuV,mBAAmB,CAC9B,KAD8B,EAE9B,OAF8B,EAG9B,WAH8B,EAI9B,eAJ8B,EAK9B,YAL8B,EAM9B,WAN8B,EAO9B,SAP8B,CAAzB;;AAUP,AAAO,IAAMC,oBAAoB,GAA1B;;;;;;;;;AASP,AAAO,IAAMC,mBAAmB,CAC9B,sBAD8B,EAE9B,mBAF8B,EAG9B,oBAH8B,EAI9B,mBAJ8B,EAK9B,oBAL8B,EAM9B,qBAN8B,EAO9B,aAP8B,EAQ9B,iBAR8B,EAS9B,oBAT8B,EAU9B,qBAV8B,EAW9B,eAX8B,EAY9B,YAZ8B,EAa9B,YAb8B,EAc9B,cAd8B,EAe9B,cAf8B,EAgB9B,yBAhB8B,EAiB9B,qBAjB8B,EAkB9B,qBAlB8B,EAmB9B,SAnB8B,EAoB9B,SApB8B,EAqB9B,gBArB8B,EAsB9B,gBAtB8B,EAuB9B,SAvB8B,CAAzB;;;;AA4BP,IAAMC,WAAW,aAAjB;AACA,AAAO,IAAMC,sBAAsB,CACjC,CAAC,SAAD,EAAYD,QAAZ,CADiC,EAEjC,CAAC,SAAD,EAAYA,QAAZ,CAFiC,CAA5B;;ACzCP,IAAME,yBAAyB;SAAA,yBACH;QAAhB5V,CAAgB,QAAhBA,CAAgB;QAAbsV,SAAa,QAAbA,SAAa;;QACpBrD,eAAJ;;;;aAISvC,gBAAgB1P,CAAhB,EAAmBuV,gBAAnB,EAAqCD,SAArC,CAAT;QACIrD,UAAUA,OAAOzP,MAAP,GAAgBgT,iBAA9B,EAAiD;aACxCxD,YAAYC,MAAZ,CAAP;;;;aAIOzB,qBAAqBxQ,CAArB,EAAwByV,gBAAxB,EAA0C,CAA1C,CAAT;QACIxD,UAAUA,OAAOzP,MAAP,GAAgBgT,iBAA9B,EAAiD;aACxCxD,YAAYC,MAAZ,CAAP;;;;;;;;;;2BAK8B0D,mBAAhC,8HAAqD;;;;;YAAzC/R,QAAyC;YAA/BiS,KAA+B;;YAC7CxV,OAAOL,EAAE4D,QAAF,CAAb;YACIvD,KAAKmC,MAAL,KAAgB,CAApB,EAAuB;cACfiG,OAAOpI,KAAKoI,IAAL,EAAb;cACIoN,MAAMtW,IAAN,CAAWkJ,IAAX,CAAJ,EAAsB;mBACbuJ,YAAYvJ,IAAZ,CAAP;;;;;;;;;;;;;;;;;;;WAKC,IAAP;;CA7BJ,CAiCA;;AC9CA;;;;AAIA,AAAO,IAAMqN,2BAA2B,CACtC,wBADsC,EAEtC,aAFsC,EAGtC,SAHsC,EAItC,gBAJsC,EAKtC,WALsC,EAMtC,cANsC,EAOtC,UAPsC,EAQtC,UARsC,EAStC,SATsC,EAUtC,eAVsC,EAWtC,UAXsC,EAYtC,cAZsC,EAatC,qBAbsC,EActC,cAdsC,EAetC,SAfsC,EAgBtC,MAhBsC,CAAjC;;;;;AAsBP,AAAO,IAAMC,2BAA2B,CACtC,4BADsC,EAEtC,oBAFsC,EAGtC,0BAHsC,EAItC,kBAJsC,EAKtC,oBALsC,EAMtC,kBANsC,EAOtC,iBAPsC,EAQtC,aARsC,EAStC,eATsC,EAUtC,qBAVsC,EAWtC,mBAXsC,EAYtC,cAZsC,EAatC,aAbsC,EActC,YAdsC,EAetC,kBAfsC,EAgBtC,WAhBsC,EAiBtC,UAjBsC,CAAjC;;;;;AAuBP,IAAMC,kBAAkB,mDAAxB;AACA,AAAO,IAAMC,yBAAyB;;AAEpC,IAAI9X,MAAJ,CAAW,4BAAX,EAAyC,GAAzC,CAFoC;;;;AAMpC,IAAIA,MAAJ,CAAW,6BAAX,EAA0C,GAA1C,CANoC;;AAQpC,IAAIA,MAAJ,iBAAyB6X,eAAzB,kBAAuD,GAAvD,CARoC,CAA/B;;ACrCP,IAAME,gCAAgC;SAAA,yBACL;QAArBlW,CAAqB,QAArBA,CAAqB;QAAlBR,GAAkB,QAAlBA,GAAkB;QAAb8V,SAAa,QAAbA,SAAa;;QACzBa,sBAAJ;;;;oBAIgBzG,gBAAgB1P,CAAhB,EAAmB8V,wBAAnB,EAA6CR,SAA7C,EAAwD,KAAxD,CAAhB;QACIa,aAAJ,EAAmB,OAAOzD,mBAAmByD,aAAnB,CAAP;;;;oBAIH3F,qBAAqBxQ,CAArB,EAAwB+V,wBAAxB,CAAhB;QACII,aAAJ,EAAmB,OAAOzD,mBAAmByD,aAAnB,CAAP;;;oBAGHjL,eAAe1L,GAAf,EAAoByW,sBAApB,CAAhB;QACIE,aAAJ,EAAmB,OAAOzD,mBAAmByD,aAAnB,CAAP;;WAEZ,IAAP;;CAlBJ,CAsBA;;ACnCA;;;;;;;;;;;;;;;;;AAiBA,IAAMC,sBAAsB;;SAAA,qBAEhB;WACD,IAAP;;CAHJ,CAOA;;ACxBA;;;AAGA,AAAO,IAAMC,2BAA2B,CACtC,UADsC,EAEtC,eAFsC,EAGtC,WAHsC,CAAjC;;AAMP,AAAO,IAAMC,2BAA2B,CACtC,qBADsC,CAAjC;;AAIP,AAAO,IAAMC,gCAAgC,CAC3C,QAD2C,EAE3C,YAF2C,EAG3C,OAH2C,EAI3C,OAJ2C,EAK3C,UAL2C,CAAtC;AAOP,AAAO,IAAMC,mCAAmC,IAAIrY,MAAJ,CAAWoY,8BAA8BnY,IAA9B,CAAmC,GAAnC,CAAX,EAAoD,GAApD,CAAzC;;AAEP,AAAO,IAAMqY,gCAAgC,CAC3C,QAD2C,EAE3C,QAF2C,EAG3C,OAH2C,EAI3C,UAJ2C,EAK3C,UAL2C,EAM3C,MAN2C,EAO3C,IAP2C,EAQ3C,YAR2C,EAS3C,MAT2C,EAU3C,QAV2C,EAW3C,QAX2C,EAY3C,KAZ2C,EAa3C,QAb2C,EAc3C,SAd2C,EAe3C,QAf2C,EAgB3C,SAhB2C,EAiB3C,SAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C,EAoB3C,UApB2C,EAqB3C,SArB2C,EAsB3C,OAtB2C,EAuB3C,OAvB2C,EAwB3C,KAxB2C,EAyB3C,aAzB2C,CAAtC;AA2BP,AAAO,IAAMC,mCAAmC,IAAIvY,MAAJ,CAAWsY,8BAA8BrY,IAA9B,CAAmC,GAAnC,CAAX,EAAoD,GAApD,CAAzC;;AAEP,AAAO,IAAMuY,SAAS,gBAAf;AACP,AAAO,IAAMC,SAAS,kBAAf;;AC3CP,SAASC,MAAT,CAAgBvW,KAAhB,EAAuB;UACXA,MAAME,IAAN,CAAW,OAAX,KAAuB,EAAjC,WAAuCF,MAAME,IAAN,CAAW,IAAX,KAAoB,EAA3D;;;;AAIF,AAAO,SAASsW,aAAT,CAAuBtX,GAAvB,EAA4B;QAC3BA,IAAIkJ,IAAJ,EAAN;MACIO,QAAQ,CAAZ;;MAEIuN,iCAAiCjX,IAAjC,CAAsCC,GAAtC,CAAJ,EAAgD;aACrC,EAAT;;;MAGEkX,iCAAiCnX,IAAjC,CAAsCC,GAAtC,CAAJ,EAAgD;aACrC,EAAT;;;;;MAKEmX,OAAOpX,IAAP,CAAYC,GAAZ,CAAJ,EAAsB;aACX,EAAT;;;MAGEoX,OAAOrX,IAAP,CAAYC,GAAZ,CAAJ,EAAsB;aACX,EAAT;;;;;SAKKyJ,KAAP;;;;AAIF,AAAO,SAAS8N,SAAT,CAAmBvP,IAAnB,EAAyB;MAC1BA,KAAKhH,IAAL,CAAU,KAAV,CAAJ,EAAsB;WACb,CAAP;;;SAGK,CAAP;;;;;AAKF,AAAO,SAASwW,cAAT,CAAwBxP,IAAxB,EAA8B;MAC/ByB,QAAQ,CAAZ;MACMgO,aAAazP,KAAKzE,OAAL,CAAa,QAAb,EAAuBiL,KAAvB,EAAnB;;MAEIiJ,WAAWzU,MAAX,KAAsB,CAA1B,EAA6B;aAClB,EAAT;;;MAGIM,UAAU0E,KAAK8C,MAAL,EAAhB;MACI4M,iBAAJ;MACIpU,QAAQN,MAAR,KAAmB,CAAvB,EAA0B;eACbM,QAAQwH,MAAR,EAAX;;;GAGDxH,OAAD,EAAUoU,QAAV,EAAoBhW,OAApB,CAA4B,UAACZ,KAAD,EAAW;QACjCsE,iBAAerF,IAAf,CAAoBsX,OAAOvW,KAAP,CAApB,CAAJ,EAAwC;eAC7B,EAAT;;GAFJ;;SAMO2I,KAAP;;;;;AAKF,AAAO,SAASkO,cAAT,CAAwB3P,IAAxB,EAA8B;MAC/ByB,QAAQ,CAAZ;MACMsE,WAAW/F,KAAKpB,IAAL,EAAjB;MACMG,UAAUgH,SAASjP,GAAT,CAAa,CAAb,CAAhB;;MAEIiI,WAAWA,QAAQ5D,OAAR,KAAoB,YAAnC,EAAiD;aACtC,EAAT;;;MAGEiC,iBAAerF,IAAf,CAAoBsX,OAAOtJ,QAAP,CAApB,CAAJ,EAA2C;aAChC,EAAT;;;SAGKtE,KAAP;;;AAGF,AAAO,SAASmO,iBAAT,CAA2B5P,IAA3B,EAAiC;MAClCyB,QAAQ,CAAZ;;MAEMtB,QAAQwB,WAAW3B,KAAKhH,IAAL,CAAU,OAAV,CAAX,CAAd;MACMiH,SAAS0B,WAAW3B,KAAKhH,IAAL,CAAU,QAAV,CAAX,CAAf;MACM6W,MAAM7P,KAAKhH,IAAL,CAAU,KAAV,CAAZ;;;MAGImH,SAASA,SAAS,EAAtB,EAA0B;aACf,EAAT;;;;MAIEF,UAAUA,UAAU,EAAxB,EAA4B;aACjB,EAAT;;;MAGEE,SAASF,MAAT,IAAmB,CAAC4P,IAAIlV,QAAJ,CAAa,QAAb,CAAxB,EAAgD;QACxCmV,OAAO3P,QAAQF,MAArB;QACI6P,OAAO,IAAX,EAAiB;;eACN,GAAT;KADF,MAEO;eACI3N,KAAK4N,KAAL,CAAWD,OAAO,IAAlB,CAAT;;;;SAIGrO,KAAP;;;AAGF,AAAO,SAASuO,eAAT,CAAyBC,KAAzB,EAAgCrW,KAAhC,EAAuC;SACpCqW,MAAMjV,MAAN,GAAe,CAAhB,GAAqBpB,KAA5B;;;ACxGF;;;;;;;;AAQA,IAAMsW,+BAA+B;SAAA,yBACA;QAAzB1X,CAAyB,QAAzBA,CAAyB;QAAtBkC,OAAsB,QAAtBA,OAAsB;QAAboT,SAAa,QAAbA,SAAa;;QAC7BqC,iBAAJ;;;;;;QAMMC,WACJlI,gBACE1P,CADF,EAEEqW,wBAFF,EAGEf,SAHF,EAIE,KAJF,CADF;;QAQIsC,QAAJ,EAAc;iBACDrD,QAAWqD,QAAX,CAAX;;UAEID,QAAJ,EAAc,OAAOA,QAAP;;;;;;QAMVE,OAAO7X,EAAE,KAAF,EAASkC,OAAT,EAAkBgO,OAAlB,EAAb;QACM4H,YAAY,EAAlB;;SAEK5W,OAAL,CAAa,UAACH,GAAD,EAAMK,KAAN,EAAgB;UACrBoG,OAAOxH,EAAEe,GAAF,CAAb;UACMsW,MAAM7P,KAAKhH,IAAL,CAAU,KAAV,CAAZ;;UAEI,CAAC6W,GAAL,EAAU;;UAENpO,QAAQ6N,cAAcO,GAAd,CAAZ;eACSN,UAAUvP,IAAV,CAAT;eACSwP,eAAexP,IAAf,CAAT;eACS2P,eAAe3P,IAAf,CAAT;eACS4P,kBAAkB5P,IAAlB,CAAT;eACSgQ,gBAAgBK,IAAhB,EAAsBzW,KAAtB,CAAT;;gBAEUiW,GAAV,IAAiBpO,KAAjB;KAbF;;gCAiBE5B,QAAQrG,OAAR,CAAgB8W,SAAhB,EAA2B1P,MAA3B,CAAkC,UAACC,GAAD,EAAMf,GAAN;aAChCwQ,UAAUxQ,GAAV,IAAiBe,IAAI,CAAJ,CAAjB,GAA0B,CAACf,GAAD,EAAMwQ,UAAUxQ,GAAV,CAAN,CAA1B,GAAkDe,GADlB;KAAlC,EAEE,CAAC,IAAD,EAAO,CAAP,CAFF,CA5C+B;;;;QA2C1B0P,MA3C0B;QA2ClB3K,QA3CkB;;;QAgD7BA,WAAW,CAAf,EAAkB;iBACLmH,QAAWwD,MAAX,CAAX;;UAEIJ,QAAJ,EAAc,OAAOA,QAAP;;;;;;;;;;2BAKOrB,wBAAvB,8HAAiD;YAAtC1S,QAAsC;;YACzCtD,QAAQN,EAAE4D,QAAF,EAAYoK,KAAZ,EAAd;YACMqJ,MAAM/W,MAAME,IAAN,CAAW,KAAX,CAAZ;YACI6W,GAAJ,EAAS;qBACI9C,QAAW8C,GAAX,CAAX;cACIM,QAAJ,EAAc,OAAOA,QAAP;;;YAGVK,OAAO1X,MAAME,IAAN,CAAW,MAAX,CAAb;YACIwX,IAAJ,EAAU;qBACGzD,QAAWyD,IAAX,CAAX;cACIL,QAAJ,EAAc,OAAOA,QAAP;;;YAGVpX,QAAQD,MAAME,IAAN,CAAW,OAAX,CAAd;YACID,KAAJ,EAAW;qBACEgU,QAAWhU,KAAX,CAAX;cACIoX,QAAJ,EAAc,OAAOA,QAAP;;;;;;;;;;;;;;;;;;WAIX,IAAP;;CA9EJ;;AAkFA,AAEA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AC7Ge,SAASM,eAAT,CAAyBhP,KAAzB,EAAgCiP,UAAhC,EAA4CF,IAA5C,EAAkD;;;;;;MAM3D/O,QAAQ,CAAZ,EAAe;QACPkP,aAAa,IAAIC,QAAQC,eAAZ,CAA4B,IAA5B,EAAkCH,UAAlC,EAA8CF,IAA9C,EAAoDM,KAApD,EAAnB;;;;;;;QAOMC,cAAc,MAAMJ,UAA1B;QACMK,eAAe,EAAE,OAAOD,cAAc,GAArB,CAAF,CAArB;WACOtP,QAAQuP,YAAf;;;SAGK,CAAP;;;ACnBa,SAASC,aAAT,CAAuBjJ,QAAvB,EAAiC3D,OAAjC,EAA0C;;;;;MAKnD5C,QAAQ,CAAZ;;MAEIyC,YAAYnM,IAAZ,CAAiBiQ,SAAS9G,IAAT,EAAjB,CAAJ,EAAuC;QAC/BgQ,gBAAgBhR,SAAS8H,QAAT,EAAmB,EAAnB,CAAtB;;;;QAIIkJ,gBAAgB,CAApB,EAAuB;cACb,CAAC,EAAT;KADF,MAEO;cACG/O,KAAKE,GAAL,CAAS,CAAT,EAAY,KAAK6O,aAAjB,CAAR;;;;;;QAME7M,WAAWA,WAAW6M,aAA1B,EAAyC;eAC9B,EAAT;;;;SAIGzP,KAAP;;;AC5Ba,SAAS0P,eAAT,CAAyB9M,OAAzB,EAAkC+M,IAAlC,EAAwC;;;;MAIjD/M,WAAW,CAAC+M,IAAhB,EAAsB;WACb,EAAP;;;SAGK,CAAP;;;ACRK,IAAMC,aAAW,IAAjB;;;;AAIP,AAAO,IAAM3T,0BAAwB,CACnC,OADmC,EAEnC,SAFmC,EAGnC,SAHmC,EAInC,SAJmC,EAKnC,QALmC,EAMnC,OANmC,EAOnC,OAPmC,EAQnC,OARmC,EASnC,KATmC,EAUnC,OAVmC,EAWnC,MAXmC,EAYnC,QAZmC,EAanC,KAbmC,EAcnC,iBAdmC,CAA9B;AAgBP,AAAO,IAAMC,6BAA2B,IAAIhH,MAAJ,CAAW+G,wBAAsB9G,IAAtB,CAA2B,GAA3B,CAAX,EAA4C,GAA5C,CAAjC;;;;;AAKP,AAAO,IAAM0a,sBAAoB,IAAI3a,MAAJ,CAAW,4CAAX,EAAyD,GAAzD,CAA1B;;;;AAIP,AAAO,IAAM4a,qBAAmB,IAAI5a,MAAJ,CAAW,kBAAX,EAA+B,GAA/B,CAAzB;;;;AAIP,AAAO,IAAM6a,sBAAoB,IAAI7a,MAAJ,CAAW,yBAAX,EAAsC,GAAtC,CAA1B,CAEP;;ACjCe,SAAS8a,oBAAT,CAA8BjB,IAA9B,EAAoC;;MAE7C7S,2BAAyB5F,IAAzB,CAA8ByY,IAA9B,CAAJ,EAAyC;WAChC,CAAC,EAAR;;;SAGK,CAAP;;;ACAF,SAASkB,SAAT,CAAiBC,KAAjB,EAAwB;UACZA,MAAM3Y,IAAN,CAAW,OAAX,KAAuB,EAAjC,WAAuC2Y,MAAM3Y,IAAN,CAAW,IAAX,KAAoB,EAA3D;;;AAGF,AAAe,SAASwW,gBAAT,CAAwBmC,KAAxB,EAA+B;;;;MAIxCrW,UAAUqW,MAAM7O,MAAN,EAAd;MACI8O,gBAAgB,KAApB;MACIC,gBAAgB,KAApB;MACIpQ,QAAQ,CAAZ;;QAEMhJ,IAAN,CAAWxC,MAAM,CAAN,EAAS,CAAT,CAAX,EAAwByD,OAAxB,CAAgC,YAAM;QAChC4B,QAAQN,MAAR,KAAmB,CAAvB,EAA0B;;;;QAIpB8W,aAAaJ,UAAQpW,OAAR,EAAiB,GAAjB,CAAnB;;;;QAII,CAACsW,aAAD,IAAkBhU,QAAQ7F,IAAR,CAAa+Z,UAAb,CAAtB,EAAgD;sBAC9B,IAAhB;eACS,EAAT;;;;;;QAME,CAACD,aAAD,IAAkBrU,kBAAkBzF,IAAlB,CAAuB+Z,UAAvB,CAAlB,IACEnU,2BAAyB5F,IAAzB,CAA8B+Z,UAA9B,CADN,EACiD;UAC3C,CAACxU,kBAAkBvF,IAAlB,CAAuB+Z,UAAvB,CAAL,EAAyC;wBACvB,IAAhB;iBACS,EAAT;;;;cAIMxW,QAAQwH,MAAR,EAAV;GAzBF;;SA4BOrB,KAAP;;;AC/Ca,SAASsQ,aAAT,CAAuBC,QAAvB,EAAiC;;;MAG1CR,oBAAkBzZ,IAAlB,CAAuBia,QAAvB,CAAJ,EAAsC;WAC7B,CAAC,GAAR;;;SAGK,CAAP;;;ACFa,SAASC,WAAT,CACbzB,IADa,EAEbE,UAFa,EAGbwB,OAHa,EAIbja,SAJa,EAKb+P,QALa,EAMbmK,YANa,EAOb;;MAEIA,aAAanY,IAAb,CAAkB;WAAOwW,SAASxY,GAAhB;GAAlB,MAA2CuR,SAA/C,EAA0D;WACjD,KAAP;;;;;MAKE,CAACiH,IAAD,IAASA,SAASE,UAAlB,IAAgCF,SAAS0B,OAA7C,EAAsD;WAC7C,KAAP;;;MAGM7b,QAZR,GAYqB4B,SAZrB,CAYQ5B,QAZR;;mBAa+B6B,IAAIC,KAAJ,CAAUqY,IAAV,CAb/B;;MAakB4B,QAblB,cAaQ/b,QAbR;;;;MAgBI+b,aAAa/b,QAAjB,EAA2B;WAClB,KAAP;;;;;MAKIgc,WAAW7B,KAAK/M,OAAL,CAAayO,OAAb,EAAsB,EAAtB,CAAjB;MACI,CAACb,WAAStZ,IAAT,CAAcsa,QAAd,CAAL,EAA8B;WACrB,KAAP;;;;;MAKE1U,2BAAyB5F,IAAzB,CAA8BiQ,QAA9B,CAAJ,EAA6C;WACpC,KAAP;;;;MAIEA,SAAShN,MAAT,GAAkB,EAAtB,EAA0B;WACjB,KAAP;;;SAGK,IAAP;;;ACpDa,SAASsX,YAAT,CAAsB9B,IAAtB,EAA4B+B,SAA5B,EAAuC;;;;;MAKhD,CAACA,UAAUxa,IAAV,CAAeyY,IAAf,CAAL,EAA2B;WAClB,CAAC,EAAR;;;SAGK,CAAP;;;ACPa,SAASgC,iBAAT,CAA2BR,QAA3B,EAAqC;;MAE9CV,oBAAkBvZ,IAAlB,CAAuBia,QAAvB,CAAJ,EAAsC;WAC7B,EAAP;;;SAGK,CAAP;;;ACHa,SAASS,aAAT,CAAuBT,QAAvB,EAAiC;;MAE1CT,mBAAiBxZ,IAAjB,CAAsBia,QAAtB,CAAJ,EAAqC;;;;;QAK/BV,oBAAkBvZ,IAAlB,CAAuBia,QAAvB,CAAJ,EAAsC;aAC7B,CAAC,EAAR;;;;SAIG,CAAP;;;ACKK,SAASU,aAAT,CAAuBR,OAAvB,EAAgC;SAC9B,IAAIvb,MAAJ,OAAeub,OAAf,EAA0B,GAA1B,CAAP;;;AAGF,SAASR,OAAT,CAAiBC,KAAjB,EAAwB3J,QAAxB,EAAkC;UACtBA,YAAY2J,MAAM1Q,IAAN,EAAtB,WAAsC0Q,MAAM3Y,IAAN,CAAW,OAAX,KAAuB,EAA7D,WAAmE2Y,MAAM3Y,IAAN,CAAW,IAAX,KAAoB,EAAvF;;;AAGF,AAAe,SAAS2Z,UAAT,OAOZ;MANDC,KAMC,QANDA,KAMC;MALDlC,UAKC,QALDA,UAKC;MAJDwB,OAIC,QAJDA,OAIC;MAHDja,SAGC,QAHDA,SAGC;MAFDO,CAEC,QAFDA,CAEC;+BADD2Z,YACC;MADDA,YACC,qCADc,EACd;;cACWla,aAAaC,IAAIC,KAAJ,CAAUuY,UAAV,CAAzB;MACM6B,YAAYG,cAAcR,OAAd,CAAlB;MACMd,OAAO3H,YAAYjR,CAAZ,CAAb;;;;;;;;;MASMqa,cAAcD,MAAMhS,MAAN,CAAa,UAACkS,aAAD,EAAgBC,IAAhB,EAAyB;;;;QAIlDvC,OAAOlM,aAAayO,KAAKtZ,OAAL,CAAa+W,IAA1B,CAAb;QACMmB,QAAQnZ,EAAEua,IAAF,CAAd;QACM/K,WAAW2J,MAAM1Q,IAAN,EAAjB;;QAEI,CAACgR,YAAYzB,IAAZ,EAAkBE,UAAlB,EAA8BwB,OAA9B,EAAuCja,SAAvC,EAAkD+P,QAAlD,EAA4DmK,YAA5D,CAAL,EAAgF;aACvEW,aAAP;;;;QAIE,CAACA,cAActC,IAAd,CAAL,EAA0B;oBACVA,IAAd,IAAsB;eACb,CADa;0BAAA;;OAAtB;KADF,MAMO;oBACSA,IAAd,EAAoBxI,QAApB,GAAkC8K,cAActC,IAAd,EAAoBxI,QAAtD,SAAkEA,QAAlE;;;QAGIgL,eAAeF,cAActC,IAAd,CAArB;QACMwB,WAAWN,QAAQC,KAAR,EAAe3J,QAAf,CAAjB;QACM3D,UAAUF,eAAeqM,IAAf,CAAhB;;QAEI/O,QAAQ6Q,aAAa9B,IAAb,EAAmB+B,SAAnB,CAAZ;aACSC,kBAAkBR,QAAlB,CAAT;aACSS,cAAcT,QAAd,CAAT;aACSD,cAAcC,QAAd,CAAT;aACSxC,iBAAemC,KAAf,CAAT;aACSF,qBAAqBjB,IAArB,CAAT;aACSW,gBAAgB9M,OAAhB,EAAyB+M,IAAzB,CAAT;aACSH,cAAcjJ,QAAd,EAAwB3D,OAAxB,CAAT;aACSoM,gBAAgBhP,KAAhB,EAAuBiP,UAAvB,EAAmCF,IAAnC,CAAT;;iBAEa/O,KAAb,GAAqBA,KAArB;;WAEOqR,aAAP;GAvCkB,EAwCjB,EAxCiB,CAApB;;SA0COjT,QAAQrG,OAAR,CAAgBqZ,WAAhB,EAA6B7X,MAA7B,KAAwC,CAAxC,GAA4C,IAA5C,GAAmD6X,WAA1D;;;AClFF;;AAEA,IAAMI,8BAA8B;SAAA,yBACgB;QAAxCza,CAAwC,QAAxCA,CAAwC;QAArCR,GAAqC,QAArCA,GAAqC;QAAhCC,SAAgC,QAAhCA,SAAgC;iCAArBka,YAAqB;QAArBA,YAAqB,qCAAN,EAAM;;gBACpCla,aAAaC,IAAIC,KAAJ,CAAUH,GAAV,CAAzB;;QAEM0Y,aAAapM,aAAatM,GAAb,CAAnB;QACMka,UAAUrN,eAAe7M,GAAf,EAAoBC,SAApB,CAAhB;;QAEM2a,QAAQpa,EAAE,SAAF,EAAakQ,OAAb,EAAd;;QAEMwK,cAAcP,WAAW;kBAAA;4BAAA;sBAAA;0BAAA;UAAA;;KAAX,CAApB;;;QAUI,CAACO,WAAL,EAAkB,OAAO,IAAP;;;;QAIZC,UAAUtT,QAAQrG,OAAR,CAAgB0Z,WAAhB,EAA6BtS,MAA7B,CAAoC,UAACC,GAAD,EAAMkS,IAAN,EAAe;UAC3DK,aAAaF,YAAYH,IAAZ,CAAnB;aACOK,WAAW3R,KAAX,GAAmBZ,IAAIY,KAAvB,GAA+B2R,UAA/B,GAA4CvS,GAAnD;KAFc,EAGb,EAAEY,OAAO,CAAC,GAAV,EAHa,CAAhB;;;;QAOI0R,QAAQ1R,KAAR,IAAiB,EAArB,EAAyB;aAChB0R,QAAQ3C,IAAf;;;WAGK,IAAP;;CAlCJ,CAuCA;;AClDO,IAAM6C,2BAA2B,CACtC,QADsC,CAAjC;;ACKP,SAASC,WAAT,CAAqBtb,GAArB,EAA0B;MAClBC,YAAYC,IAAIC,KAAJ,CAAUH,GAAV,CAAlB;MACQ3B,QAFgB,GAEH4B,SAFG,CAEhB5B,QAFgB;;SAGjBA,QAAP;;;AAGF,SAASmE,MAAT,CAAgBxC,GAAhB,EAAqB;SACZ;YAAA;YAEGsb,YAAYtb,GAAZ;GAFV;;;AAMF,IAAMub,sBAAsB;SAAA,yBACK;QAArB/a,CAAqB,QAArBA,CAAqB;QAAlBR,GAAkB,QAAlBA,GAAkB;QAAb8V,SAAa,QAAbA,SAAa;;QACvB0F,aAAahb,EAAE,qBAAF,CAAnB;QACIgb,WAAWxY,MAAX,KAAsB,CAA1B,EAA6B;UACrBwV,OAAOgD,WAAWxa,IAAX,CAAgB,MAAhB,CAAb;UACIwX,IAAJ,EAAU;eACDhW,OAAOgW,IAAP,CAAP;;;;QAIEiD,UAAUvL,gBAAgB1P,CAAhB,EAAmB6a,wBAAnB,EAA6CvF,SAA7C,CAAhB;QACI2F,OAAJ,EAAa;aACJjZ,OAAOiZ,OAAP,CAAP;;;WAGKjZ,OAAOxC,GAAP,CAAP;;CAfJ,CAoBA;;ACtCO,IAAM0b,yBAAyB,CACpC,gBADoC,EAEpC,qBAFoC,CAA/B;;ACSA,SAAStZ,OAAT,CAAeM,OAAf,EAAwBlC,CAAxB,EAA4C;MAAjBmb,SAAiB,yDAAL,GAAK;;YACvCjZ,QAAQ+I,OAAR,CAAgB,UAAhB,EAA4B,GAA5B,EAAiCvC,IAAjC,EAAV;SACO0S,UAAUlZ,OAAV,EAAmBiZ,SAAnB,EAA8B,EAAEE,SAAS,UAAX,EAA9B,CAAP;;;AAGF,IAAMC,0BAA0B;SAAA,yBACK;QAAzBtb,CAAyB,QAAzBA,CAAyB;QAAtBkC,OAAsB,QAAtBA,OAAsB;QAAboT,SAAa,QAAbA,SAAa;;QAC3BiG,UAAU7L,gBAAgB1P,CAAhB,EAAmBkb,sBAAnB,EAA2C5F,SAA3C,CAAhB;QACIiG,OAAJ,EAAa;aACJ3Z,QAAMwO,UAAUmL,OAAV,EAAmBvb,CAAnB,CAAN,CAAP;;;QAGImb,YAAY,GAAlB;QACMK,eAAetZ,QAAQ6H,KAAR,CAAc,CAAd,EAAiBoR,YAAY,CAA7B,CAArB;WACOvZ,QAAM5B,EAAEwb,YAAF,EAAgB/S,IAAhB,EAAN,EAA8BzI,CAA9B,EAAiCmb,SAAjC,CAAP;;CATJ,CAaA;;ACvBA,IAAMM,4BAA4B;SAAA,yBACX;QAAXvZ,OAAW,QAAXA,OAAW;;QACblC,IAAIoC,QAAQC,IAAR,CAAaH,OAAb,CAAV;;QAEMuG,OAAOuC,gBAAgBhL,EAAE,KAAF,EAASgO,KAAT,GAAiBvF,IAAjB,EAAhB,CAAb;WACOA,KAAKsD,KAAL,CAAW,IAAX,EAAiBvJ,MAAxB;;CALJ,CASA;;ACCA,IAAMkZ,mBAAmB;;UAEf,GAFe;SAGhBrG,sBAAsBsG,OAHN;kBAIPzF,8BAA8ByF,OAJvB;UAKf/F,uBAAuB+F,OALR;WAMd/G,wBAAwB+G,OAAxB,CAAgCC,IAAhC,CAAqChH,uBAArC,CANc;kBAOP8C,6BAA6BiE,OAPtB;OAQlBvF,oBAAoBuF,OARF;iBASRlB,4BAA4BkB,OATpB;kBAUPZ,oBAAoBY,OAVb;WAWdL,wBAAwBK,OAXV;cAYXF,0BAA0BE,OAZf;aAaZ;QAAG9M,KAAH,QAAGA,KAAH;WAAegN,gBAAgBC,YAAhB,CAA6BjN,KAA7B,CAAf;GAbY;;SAAA,mBAeftQ,OAfe,EAeN;QACPoS,IADO,GACEpS,OADF,CACPoS,IADO;;;QAGXA,IAAJ,EAAU;UACF3Q,IAAIoC,QAAQC,IAAR,CAAasO,IAAb,CAAV;cACQ3Q,CAAR,GAAYA,CAAZ;;;QAGI6O,QAAQ,KAAKA,KAAL,CAAWtQ,OAAX,CAAd;QACMwd,iBAAiB,KAAKA,cAAL,CAAoBxd,OAApB,CAAvB;QACM0T,SAAS,KAAKA,MAAL,CAAY1T,OAAZ,CAAf;QACM2D,UAAU,KAAKA,OAAL,cAAkB3D,OAAlB,IAA2BsQ,YAA3B,IAAhB;QACMmN,iBAAiB,KAAKA,cAAL,cAAyBzd,OAAzB,IAAkC2D,gBAAlC,IAAvB;QACMoQ,MAAM,KAAKA,GAAL,cAAc/T,OAAd,IAAuB2D,gBAAvB,IAAZ;QACM+Z,gBAAgB,KAAKA,aAAL,CAAmB1d,OAAnB,CAAtB;QACMgd,UAAU,KAAKA,OAAL,cAAkBhd,OAAlB,IAA2B2D,gBAA3B,IAAhB;QACMga,aAAa,KAAKA,UAAL,cAAqB3d,OAArB,IAA8B2D,gBAA9B,IAAnB;QACMia,YAAY,KAAKA,SAAL,CAAe,EAAEtN,YAAF,EAAf,CAAlB;;0BACwB,KAAKuN,cAAL,CAAoB7d,OAApB,CAlBT;;QAkBPiB,GAlBO,mBAkBPA,GAlBO;QAkBF6c,MAlBE,mBAkBFA,MAlBE;;;WAoBR;kBAAA;oBAAA;sBAGWN,kBAAkB,IAH7B;cAAA;oCAAA;sBAAA;kCAAA;cAAA;oBAAA;sBAAA;4BAAA;;KAAP;;CAnCJ,CAoDA;;AC7De,SAASO,YAAT,CAAsB9c,GAAtB,EAA2BC,SAA3B,EAAsC;cACvCA,aAAaC,IAAIC,KAAJ,CAAUH,GAAV,CAAzB;mBACqBC,SAF8B;MAE3C5B,QAF2C,cAE3CA,QAF2C;;MAG7C0e,aAAa1e,SAASkO,KAAT,CAAe,GAAf,EAAoBhC,KAApB,CAA0B,CAAC,CAA3B,EAA8B3L,IAA9B,CAAmC,GAAnC,CAAnB;;SAEOkF,WAAWzF,QAAX,KAAwByF,WAAWiZ,UAAX,CAAxB,IAAkDb,gBAAzD;;;ACVK,IAAMc,UAAU,cAAhB;;ACOP;AACA,AAAO,SAASC,gBAAT,CAA0BrN,QAA1B,EAAoCpP,CAApC,QAAkD;MAAT4B,KAAS,QAATA,KAAS;;MACnD,CAACA,KAAL,EAAY,OAAO,IAAP;;IAEVA,MAAMxD,IAAN,CAAW,GAAX,CAAF,EAAmBgR,QAAnB,EAA6BzN,MAA7B;;SAEOyN,QAAP;;;;AAIF,AAAO,SAASsN,iBAAT,CAA2BtN,QAA3B,EAAqCpP,CAArC,SAAwD;MAAd2c,UAAc,SAAdA,UAAc;;MACzD,CAACA,UAAL,EAAiB,OAAO,IAAP;;UAET3b,OAAR,CAAgB2b,UAAhB,EAA4Bzb,OAA5B,CAAoC,UAACoG,GAAD,EAAS;QACrCsV,WAAW5c,EAAEsH,GAAF,EAAO8H,QAAP,CAAjB;QACM7O,QAAQoc,WAAWrV,GAAX,CAAd;;;QAGI,OAAO/G,KAAP,KAAiB,QAArB,EAA+B;eACpBJ,IAAT,CAAc,UAACiB,KAAD,EAAQf,IAAR,EAAiB;sBACfL,EAAEK,IAAF,CAAd,EAAuBL,CAAvB,EAA0B2c,WAAWrV,GAAX,CAA1B;OADF;KADF,MAIO,IAAI,OAAO/G,KAAP,KAAiB,UAArB,EAAiC;;eAE7BJ,IAAT,CAAc,UAACiB,KAAD,EAAQf,IAAR,EAAiB;YACvB2B,SAASzB,MAAMP,EAAEK,IAAF,CAAN,EAAeL,CAAf,CAAf;;YAEI,OAAOgC,MAAP,KAAkB,QAAtB,EAAgC;wBAChBhC,EAAEK,IAAF,CAAd,EAAuBL,CAAvB,EAA0BgC,MAA1B;;OAJJ;;GAXJ;;SAqBOoN,QAAP;;;AAGF,AAAO,SAASyN,MAAT,CAAgBnI,IAAhB,EAAsB;MACnB1U,CADmB,GAC8B0U,IAD9B,CACnB1U,CADmB;MAChBqB,IADgB,GAC8BqT,IAD9B,CAChBrT,IADgB;MACVyb,cADU,GAC8BpI,IAD9B,CACVoI,cADU;0BAC8BpI,IAD9B,CACMqI,WADN;MACMA,WADN,qCACoB,KADpB;;;MAGvB,CAACD,cAAL,EAAqB,OAAO,IAAP;;;;MAIjB,OAAOA,cAAP,KAA0B,QAA9B,EAAwC,OAAOA,cAAP;;MAEhCrM,SATmB,GASkBqM,cATlB,CASnBrM,SATmB;8BASkBqM,cATlB,CASRE,cATQ;MASRA,cATQ,yCASS,IATT;;;MAWrBC,mBAAmBxM,UAAUjP,IAAV,CAAe;WAAYxB,EAAE4D,QAAF,EAAYpB,MAAZ,KAAuB,CAAvB,IAA4BxC,EAAE4D,QAAF,EAAY6E,IAAZ,GAAmBC,IAAnB,OAA8B,EAAtE;GAAf,CAAzB;;MAEI,CAACuU,gBAAL,EAAuB,OAAO,IAAP;;;;;;;;MAQnBF,WAAJ,EAAiB;QACX3N,WAAWpP,EAAEid,gBAAF,CAAf;;;aAGSC,IAAT,CAAcld,EAAE,aAAF,CAAd;eACWoP,SAAS9E,MAAT,EAAX;;eAEWoS,kBAAkBtN,QAAlB,EAA4BpP,CAA5B,EAA+B8c,cAA/B,CAAX;eACWL,iBAAiBrN,QAAjB,EAA2BpP,CAA3B,EAA8B8c,cAA9B,CAAX;;QAEIE,cAAJ,EAAoB;iBACP1I,SAASjT,IAAT,EAAe+N,QAAf,EAAyBsF,IAAzB,CAAX;;;WAGK1U,EAAE2Q,IAAF,CAAOvB,QAAP,CAAP;;;;MAII5O,OAAOyc,iBAAiB5T,KAAjB,CAAuBmT,OAAvB,CAAb;MACIxa,eAAJ;;MAEIxB,IAAJ,EAAU;aACCR,EAAEid,gBAAF,EAAoBzc,IAApB,CAAyBA,KAAK,CAAL,CAAzB,CAAT;GADF,MAEO;;aAEIR,EAAEid,gBAAF,EAAoBxU,IAApB,EAAT;;;;;MAKEuU,cAAJ,EAAoB;WACX1I,SAASjT,IAAT,EAAeW,MAAf,EAAuB0S,IAAvB,CAAP;;;SAGK1S,MAAP;;;AAGF,SAASmb,aAAT,CAAuBzI,IAAvB,EAA6B;MACnBrT,IADmB,GACCqT,IADD,CACnBrT,IADmB;MACb+b,SADa,GACC1I,IADD,CACb0I,SADa;;;;;SAKpBP,oBAAYnI,IAAZ,IAAkBoI,gBAAgBM,UAAU/b,IAAV,CAAlC,QACLqa,iBAAiBra,IAAjB,EAAuBqT,IAAvB,CADF;;;AAIF,IAAM2I,gBAAgB;SAAA,qBACwB;QAApCD,SAAoC,yDAAxB1B,gBAAwB;QAANhH,IAAM;gBACFA,IADE;QAClC4I,WADkC,SAClCA,WADkC;QACrBC,cADqB,SACrBA,cADqB;;;QAGtCH,UAAUf,MAAV,KAAqB,GAAzB,EAA8B,OAAOe,UAAUzB,OAAV,CAAkBjH,IAAlB,CAAP;;wBAGzBA,IADL;;;;QAKI4I,WAAJ,EAAiB;UACTpb,WAAUib,2BACXzI,IADW,IACLrT,MAAM,SADD,EACY0b,aAAa,IADzB,EAC+BlO,OAAO0O;SADtD;aAGO;;OAAP;;QAII1O,QAAQsO,2BAAmBzI,IAAnB,IAAyBrT,MAAM,OAA/B,IAAd;QACM0a,iBAAiBoB,2BAAmBzI,IAAnB,IAAyBrT,MAAM,gBAA/B,IAAvB;QACM4Q,SAASkL,2BAAmBzI,IAAnB,IAAyBrT,MAAM,QAA/B,IAAf;QACM4a,gBAAgBkB,2BAAmBzI,IAAnB,IAAyBrT,MAAM,eAA/B,IAAtB;QACMa,UAAUib,2BACXzI,IADW,IACLrT,MAAM,SADD,EACY0b,aAAa,IADzB,EAC+BlO;OAD/C;QAGMmN,iBAAiBmB,2BAAmBzI,IAAnB,IAAyBrT,MAAM,gBAA/B,EAAiDa,gBAAjD,IAAvB;QACMoQ,MAAM6K,2BAAmBzI,IAAnB,IAAyBrT,MAAM,KAA/B,EAAsCa,gBAAtC,IAAZ;QACMqZ,UAAU4B,2BAAmBzI,IAAnB,IAAyBrT,MAAM,SAA/B,EAA0Ca,gBAA1C,IAAhB;QACMga,aAAaiB,2BAAmBzI,IAAnB,IAAyBrT,MAAM,YAA/B,EAA6Ca,gBAA7C,IAAnB;QACMia,YAAYgB,2BAAmBzI,IAAnB,IAAyBrT,MAAM,WAA/B,EAA4CwN,YAA5C,IAAlB;;yBACwBsO,2BAAmBzI,IAAnB,IAAyBrT,MAAM,gBAA/B,IA9BkB;;QA8BlC7B,GA9BkC,kBA8BlCA,GA9BkC;QA8B7B6c,MA9B6B,kBA8B7BA,MA9B6B;;;WAgCnC;kBAAA;sBAAA;oBAAA;oCAAA;oCAAA;cAAA;kCAAA;cAAA;oBAAA;sBAAA;4BAAA;;KAAP;;CAjCJ,CAkDA;;AC5JA;sDAAe;QAEXJ,aAFW,SAEXA,aAFW;QAGXtL,IAHW,SAGXA,IAHW;QAIX3Q,CAJW,SAIXA,CAJW;QAKXsV,SALW,SAKXA,SALW;QAMXtT,MANW,SAMXA,MANW;QAOXwb,SAPW,SAOXA,SAPW;QAQX3O,KARW,SAQXA,KARW;QASXrP,GATW,SASXA,GATW;;;;;;;iBAAA,GAaD,CAbC;wBAAA,GAcQ,CAACsM,aAAatM,GAAb,CAAD,CAdR;;;;;;kBAkBNyc,iBAAiBwB,QAAQ,EAlBnB;;;;;qBAmBF,CAAT;;mBACU5b,SAAS6b,MAAT,CAAgBzB,aAAhB,CApBC;;;aAAA;;mBAqBJjc,EAAE2Q,IAAF,EAAP;;yBArBW,GAuBW;mBACfsL,aADe;wBAAA;kBAAA;kCAAA;2BAKP,IALO;8BAMJpN,KANI;;aAvBX;0BAAA,GAiCYwO,cAAc1B,OAAd,CAAsB6B,SAAtB,EAAiCG,aAAjC,CAjCZ;;;yBAmCE5Q,IAAb,CAAkBkP,aAAlB;kCAEKja,MADL;sCAGMA,OAAOE,OADX,yCAGaub,KAHb,uBAIIG,eAAe1b,OAJnB;;;4BAQc0b,eAAe3B,aAA/B;;;;;0DAIGja,MAlDQ;2BAmDEyb,KAnDF;8BAoDKA;;;;;;;;;GApDpB;;WAA8BI,eAA9B;;;;SAA8BA,eAA9B;;;ACMA,IAAMC,OAAO;OAAA,iBACCte,GADD,EACMmR,IADN,EACuB;;;QAAX+D,IAAW,yDAAJ,EAAI;;;;;;;;qBACCA,QAAQ,IADT;wCACxBqJ,aADwB;2BAAA,sCACR,IADQ;uBAAA,GAGdre,IAAIC,KAAJ,CAAUH,GAAV,CAHc;;kBAK3B5B,YAAY6B,SAAZ,CAL2B;;;;;+CAMvB3B,OAAO+B,MANgB;;;uBAAA,GASdyc,aAAa9c,GAAb,EAAkBC,SAAlB,CATc;;;;qBAYhBoC,SAAS6b,MAAT,CAAgBle,GAAhB,EAAqBmR,IAArB,EAA2BlR,SAA3B,CAZgB;;;eAAA;;mBAe5BO,EAAEb,KAf0B;;;;;+CAgBvBa,CAhBuB;;;;qBAmBzBA,EAAE2Q,IAAF,EAAP;;;;uBAnBgC,GAuBd3Q,EAAE,MAAF,EAAU2D,GAAV,CAAc,UAACvD,CAAD,EAAIC,IAAJ;uBAAaL,EAAEK,IAAF,EAAQG,IAAR,CAAa,MAAb,CAAb;eAAd,EAAiD0P,OAAjD,EAvBc;oBAAA,GAyBnBmN,cAAc1B,OAAd,CAAsB6B,SAAtB,EAAiC,EAAEhe,QAAF,EAAOmR,UAAP,EAAa3Q,IAAb,EAAgBsV,oBAAhB,EAA2B7V,oBAA3B,EAAjC,CAzBmB;wBA0BCuC,MA1BD;mBAAA,WA0BxB6M,KA1BwB;2BAAA,WA0BjBoN,aA1BiB;;;;oBA6B5B8B,iBAAiB9B,aA7BW;;;;;;qBA8Bf4B,gBACb;oCAAA;4CAAA;0BAAA;oBAAA;oCAAA;8BAAA;4BAAA;;eADa,CA9Be;;;oBAAA;;;;;oCA4CzB7b,MADL;6BAEe,CAFf;gCAGkB;;;;+CAIbA,MAlDyB;;;;;;;;;;CADpC,CAwDA;;"}
\ No newline at end of file
diff --git a/dist/mercury.js b/dist/mercury.js
index 85f5be5d..effdae58 100644
--- a/dist/mercury.js
+++ b/dist/mercury.js
@@ -10,13 +10,13 @@ var cheerio = _interopDefault(require('cheerio'));
var _Promise = _interopDefault(require('babel-runtime/core-js/promise'));
var request = _interopDefault(require('request'));
var _Reflect$ownKeys = _interopDefault(require('babel-runtime/core-js/reflect/own-keys'));
-var _Object$keys = _interopDefault(require('babel-runtime/core-js/object/keys'));
var _toConsumableArray = _interopDefault(require('babel-runtime/helpers/toConsumableArray'));
-var _slicedToArray = _interopDefault(require('babel-runtime/helpers/slicedToArray'));
-var stringDirection = _interopDefault(require('string-direction'));
-var _getIterator = _interopDefault(require('babel-runtime/core-js/get-iterator'));
var _defineProperty = _interopDefault(require('babel-runtime/helpers/defineProperty'));
+var _slicedToArray = _interopDefault(require('babel-runtime/helpers/slicedToArray'));
var _typeof = _interopDefault(require('babel-runtime/helpers/typeof'));
+var _getIterator = _interopDefault(require('babel-runtime/core-js/get-iterator'));
+var _Object$keys = _interopDefault(require('babel-runtime/core-js/object/keys'));
+var stringDirection = _interopDefault(require('string-direction'));
var validUrl = _interopDefault(require('valid-url'));
var moment = _interopDefault(require('moment'));
var wuzzy = _interopDefault(require('wuzzy'));
@@ -68,8 +68,9 @@ var Errors = {
}
};
-var REQUEST_HEADERS = {
- 'User-Agent': 'Readability - http://readability.com/about/'
+// Browser does not like us setting user agent
+var REQUEST_HEADERS = cheerio.browser ? {} : {
+ 'User-Agent': 'Mercury - https://mercury.postlight.com/web-parser/'
};
// The number of milliseconds to attempt to fetch a resource before timing out.
@@ -161,7 +162,7 @@ var fetchResource$1 = (function () {
parsedUrl = parsedUrl || URL.parse(encodeURI(url));
options = {
- url: parsedUrl,
+ url: parsedUrl.href,
headers: _extends({}, REQUEST_HEADERS),
timeout: FETCH_TIMEOUT,
// Don't set encoding; fixes issues
@@ -234,2412 +235,2480 @@ function normalizeMetaTags($) {
return $;
}
-var IS_LINK = new RegExp('https?://', 'i');
-var IS_IMAGE = new RegExp('.(png|gif|jpe?g)', 'i');
+// Spacer images to be removed
+var SPACER_RE = new RegExp('trans|transparent|spacer|blank', 'i');
-var TAGS_TO_REMOVE = ['script', 'style', 'form'].join(',');
+// The class we will use to mark elements we want to keep
+// but would normally remove
+var KEEP_CLASS = 'mercury-parser-keep';
-// Convert all instances of images with potentially
-// lazy loaded images into normal images.
-// Many sites will have img tags with no source, or an image tag with a src
-// attribute that a is a placeholer. We need to be able to properly fill in
-// the src attribute so the images are no longer lazy loaded.
-function convertLazyLoadedImages($) {
- $('img').each(function (_, img) {
- _Reflect$ownKeys(img.attribs).forEach(function (attr) {
- var value = img.attribs[attr];
+var KEEP_SELECTORS = ['iframe[src^="https://www.youtube.com"]', 'iframe[src^="http://www.youtube.com"]', 'iframe[src^="https://player.vimeo"]', 'iframe[src^="http://player.vimeo"]'];
- if (attr !== 'src' && IS_LINK.test(value) && IS_IMAGE.test(value)) {
- $(img).attr('src', value);
- }
- });
- });
+// A list of tags to strip from the output if we encounter them.
+var STRIP_OUTPUT_TAGS = ['title', 'script', 'noscript', 'link', 'style', 'hr', 'embed', 'iframe', 'object'];
- return $;
-}
+// cleanAttributes
+var REMOVE_ATTRS = ['style', 'align'];
+var REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(function (selector) {
+ return '[' + selector + ']';
+});
+var REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');
+var WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt'];
+var WHITELIST_ATTRS_RE = new RegExp('^(' + WHITELIST_ATTRS.join('|') + ')$', 'i');
-function isComment(index, node) {
- return node.type === 'comment';
-}
+// removeEmpty
+var REMOVE_EMPTY_TAGS = ['p'];
+var REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(function (tag) {
+ return tag + ':empty';
+}).join(',');
-function cleanComments($) {
- $.root().find('*').contents().filter(isComment).remove();
+// cleanTags
+var CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');
- return $;
-}
+// cleanHeaders
+var HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];
+var HEADER_TAG_LIST = HEADER_TAGS.join(',');
-function clean($) {
- $(TAGS_TO_REMOVE).remove();
+// // CONTENT FETCHING CONSTANTS ////
- $ = cleanComments($);
- return $;
-}
+// A list of strings that can be considered unlikely candidates when
+// extracting content from a resource. These strings are joined together
+// and then tested for existence using re:test, so may contain simple,
+// non-pipe style regular expression queries if necessary.
+var UNLIKELY_CANDIDATES_BLACKLIST = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot',
+// 'form', // This is too generic, has too many false positives
+'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.
+'menu', 'meta', 'nav', 'outbrain', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box
+'presence_control_external', // lifehacker.com container full of false positives
+'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'taboola', 'tools'];
-var Resource = {
+// A list of strings that can be considered LIKELY candidates when
+// extracting content from a resource. Essentially, the inverse of the
+// blacklist above - if something matches both blacklist and whitelist,
+// it is kept. This is useful, for example, if something has a className
+// of "rss-content entry-content". It matched 'rss', so it would normally
+// be removed, however, it's also the entry content, so it should be left
+// alone.
+//
+// These strings are joined together and then tested for existence using
+// re:test, so may contain simple, non-pipe style regular expression queries
+// if necessary.
+var UNLIKELY_CANDIDATES_WHITELIST = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form
+'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];
- // Create a Resource.
- //
- // :param url: The URL for the document we should retrieve.
- // :param response: If set, use as the response rather than
- // attempting to fetch it ourselves. Expects a
- // string.
- create: function create(url, preparedResponse, parsedUrl) {
- var _this = this;
+// A list of tags which, if found inside, should cause a to NOT
+// be turned into a paragraph tag. Shallow div tags without these elements
+// should be turned into tags.
+var DIV_TO_P_BLOCK_TAGS = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');
- return _asyncToGenerator(_regeneratorRuntime.mark(function _callee() {
- var result, validResponse;
- return _regeneratorRuntime.wrap(function _callee$(_context) {
- while (1) {
- switch (_context.prev = _context.next) {
- case 0:
- result = void 0;
+// A list of tags that should be ignored when trying to find the top candidate
+// for a document.
- if (!preparedResponse) {
- _context.next = 6;
- break;
- }
- validResponse = {
- statusMessage: 'OK',
- statusCode: 200,
- headers: {
- 'content-type': 'text/html',
- 'content-length': 500
- }
- };
- result = { body: preparedResponse, response: validResponse };
- _context.next = 9;
- break;
+// A list of selectors that specify, very clearly, either hNews or other
+// very content-specific style content, like Blogger templates.
+// More examples here: http://microformats.org/wiki/blog-post-formats
- case 6:
- _context.next = 8;
- return fetchResource$1(url, parsedUrl);
- case 8:
- result = _context.sent;
- case 9:
- if (!result.error) {
- _context.next = 11;
- break;
- }
- return _context.abrupt('return', result);
- case 11:
- return _context.abrupt('return', _this.generateDoc(result));
+// A list of strings that denote a positive scoring for this content as being
+// an article container. Checked against className and id.
+//
+// TODO: Perhaps have these scale based on their odds of being quality?
+var POSITIVE_SCORE_HINTS = ['article', 'articlecontent', 'instapaper_body', 'blog', 'body', 'content', 'entry-content-asset', 'entry', 'hentry', 'main', 'Normal', 'page', 'pagination', 'permalink', 'post', 'story', 'text', '[-_]copy', // usatoday
+'\\Bcopy'];
- case 12:
- case 'end':
- return _context.stop();
- }
- }
- }, _callee, _this);
- }))();
- },
- generateDoc: function generateDoc(_ref) {
- var content = _ref.body,
- response = _ref.response;
- var contentType = response.headers['content-type'];
+// The above list, joined into a matching regular expression
+var POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');
- // TODO: Implement is_text function from
- // https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57
+// Readability publisher-specific guidelines
- if (!contentType.includes('html') && !contentType.includes('text')) {
- throw new Error('Content does not appear to be text.');
- }
- var $ = cheerio.load(content, { normalizeWhitespace: true });
+// A list of strings that denote a negative scoring for this content as being
+// an article container. Checked against className and id.
+//
+// TODO: Perhaps have these scale based on their odds of being quality?
+var NEGATIVE_SCORE_HINTS = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'bottom', 'byline', 'clear', 'com-', 'combx', 'comment', 'comment\\B', 'contact', 'copy', 'credit', 'crumb', 'date', 'deck', 'excerpt', 'featured', // tnr.com has a featured_content which throws us off
+'foot', 'footer', 'footnote', 'graf', 'head', 'info', 'infotext', // newscientist.com copyright
+'instapaper_ignore', 'jump', 'linebreak', 'link', 'masthead', 'media', 'meta', 'modal', 'outbrain', // slate.com junk
+'promo', 'pr_', // autoblog - press release
+'related', 'respond', 'roundcontent', // lifehacker restricted content warning
+'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget'];
+// The above list, joined into a matching regular expression
+var NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');
- if ($.root().children().length === 0) {
- throw new Error('No children, likely a bad parse.');
- }
+// XPath to try to determine if a page is wordpress. Not always successful.
+var IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';
- $ = normalizeMetaTags($);
- $ = convertLazyLoadedImages($);
- $ = clean($);
+// Match a digit. Pretty clear.
- return $;
- }
-};
-var merge = function merge(extractor, domains) {
- return domains.reduce(function (acc, domain) {
- acc[domain] = extractor;
- return acc;
- }, {});
-};
+// A list of words that, if found in link text or URLs, likely mean that
+// this link is not a next page link.
-function mergeSupportedDomains(extractor) {
- return extractor.supportedDomains ? merge(extractor, [extractor.domain].concat(_toConsumableArray(extractor.supportedDomains))) : merge(extractor, [extractor.domain]);
-}
-var BloggerExtractor = {
- domain: 'blogspot.com',
- content: {
- // Blogger is insane and does not load its content
- // initially in the page, but it's all there
- // in noscript
- selectors: ['.post-content noscript'],
- // Selectors to remove from the extracted content
- clean: [],
+// Match any phrase that looks like it could be page, or paging, or pagination
+var PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');
- // Convert the noscript tag to a div
- transforms: {
- noscript: 'div'
- }
- },
+// Match any link text/classname/id that looks like it could mean the next
+// page. Things like: next, continue, >, >>, » but not >|, »| as those can
+// mean last page.
+// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\|]|$)|»([^\|]|$))', 'i');
- author: {
- selectors: ['.post-author-name']
- },
- title: {
- selectors: ['.post h2.title']
- },
+// Match any link text/classname/id that looks like it is an end link: things
+// like "first", "last", "end", etc.
- date_published: {
- selectors: ['span.publishdate']
- }
-};
-var NYMagExtractor = {
- domain: 'nymag.com',
- content: {
- // Order by most likely. Extractor will stop on first occurrence
- selectors: ['div.article-content', 'section.body', 'article.article'],
+// Match any link text/classname/id that looks like it means the previous
+// page.
- // Selectors to remove from the extracted content
- clean: ['.ad', '.single-related-story'],
- // Object of tranformations to make on matched elements
- // Each key is the selector, each value is the tag to
- // transform to.
- // If a function is given, it should return a string
- // to convert to or nothing (in which case it will not perform
- // the transformation.
- transforms: {
- // Convert h1s to h2s
- h1: 'h2',
+// Match 2 or more consecutive tags
- // Convert lazy-loaded noscript images to figures
- noscript: function noscript($node) {
- var $children = $node.children();
- if ($children.length === 1 && $children.get(0).tagName === 'img') {
- return 'figure';
- }
- return null;
- }
- }
- },
-
- title: {
- selectors: ['h1.lede-feature-title', 'h1.headline-primary', 'h1']
- },
+// Match 1 BR tag.
- author: {
- selectors: ['.by-authors', '.lede-feature-author']
- },
- dek: {
- selectors: ['.lede-feature-teaser']
- },
+// A list of all of the block level tags known in HTML5 and below. Taken from
+// http://bit.ly/qneNIT
+var BLOCK_LEVEL_TAGS = ['article', 'aside', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'col', 'colgroup', 'dd', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'map', 'object', 'ol', 'output', 'p', 'pre', 'progress', 'section', 'table', 'tbody', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'ul', 'video'];
+var BLOCK_LEVEL_TAGS_RE = new RegExp('^(' + BLOCK_LEVEL_TAGS.join('|') + ')$', 'i');
- date_published: {
- selectors: [['time.article-timestamp[datetime]', 'datetime'], 'time.article-timestamp']
- }
-};
+// The removal is implemented as a blacklist and whitelist, this test finds
+// blacklisted elements that aren't whitelisted. We do this all in one
+// expression-both because it's only one pass, and because this skips the
+// serialization for whitelisted nodes.
+var candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');
+var CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');
-var WikipediaExtractor = {
- domain: 'wikipedia.org',
- content: {
- selectors: ['#mw-content-text'],
+var candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');
+var CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');
- defaultCleaner: false,
+function stripUnlikelyCandidates($) {
+ // Loop through the provided document and remove any non-link nodes
+ // that are unlikely candidates for article content.
+ //
+ // Links are ignored because there are very often links to content
+ // that are identified as non-body-content, but may be inside
+ // article-like content.
+ //
+ // :param $: a cheerio object to strip nodes from
+ // :return $: the cleaned cheerio object
+ $('*').not('a').each(function (index, node) {
+ var $node = $(node);
+ var classes = $node.attr('class');
+ var id = $node.attr('id');
+ if (!id && !classes) return;
- // transform top infobox to an image with caption
- transforms: {
- '.infobox img': function infoboxImg($node) {
- var $parent = $node.parents('.infobox');
- // Only prepend the first image in .infobox
- if ($parent.children('img').length === 0) {
- $parent.prepend($node);
- }
- },
- '.infobox caption': 'figcaption',
- '.infobox': 'figure'
- },
+ var classAndId = (classes || '') + ' ' + (id || '');
+ if (CANDIDATES_WHITELIST.test(classAndId)) {
+ return;
+ } else if (CANDIDATES_BLACKLIST.test(classAndId)) {
+ $node.remove();
+ }
+ });
- // Selectors to remove from the extracted content
- clean: ['.mw-editsection', 'figure tr, figure td, figure tbody', '#toc', '.navbox']
+ return $;
+}
- },
+// ## NOTES:
+// Another good candidate for refactoring/optimizing.
+// Very imperative code, I don't love it. - AP
- author: 'Wikipedia Contributors',
+// Given cheerio object, convert consecutive tags into
+// tags instead.
+//
+// :param $: A cheerio object
- title: {
- selectors: ['h2.title']
- },
+function brsToPs$$1($) {
+ var collapsing = false;
+ $('br').each(function (index, element) {
+ var $element = $(element);
+ var nextElement = $element.next().get(0);
- date_published: {
- selectors: ['#footer-info-lastmod']
- }
+ if (nextElement && nextElement.tagName.toLowerCase() === 'br') {
+ collapsing = true;
+ $element.remove();
+ } else if (collapsing) {
+ collapsing = false;
+ // $(element).replaceWith('')
+ paragraphize(element, $, true);
+ }
+ });
-};
+ return $;
+}
-var TwitterExtractor = {
- domain: 'twitter.com',
+// Given a node, turn it into a P if it is not already a P, and
+// make sure it conforms to the constraints of a P tag (I.E. does
+// not contain any other block tags.)
+//
+// If the node is a , it treats the following inline siblings
+// as if they were its children.
+//
+// :param node: The node to paragraphize; this is a raw node
+// :param $: The cheerio object to handle dom manipulation
+// :param br: Whether or not the passed node is a br
- content: {
- transforms: {
- // We're transforming essentially the whole page here.
- // Twitter doesn't have nice selectors, so our initial
- // selector grabs the whole page, then we're re-writing
- // it to fit our needs before we clean it up.
- '.permalink[role=main]': function permalinkRoleMain($node, $) {
- var tweets = $node.find('.tweet');
- var $tweetContainer = $('');
- $tweetContainer.append(tweets);
- $node.replaceWith($tweetContainer);
- },
+function paragraphize(node, $) {
+ var br = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
- // Twitter wraps @ with s, which
- // renders as a strikethrough
- s: 'span'
- },
+ var $node = $(node);
- selectors: ['.permalink[role=main]'],
+ if (br) {
+ var sibling = node.nextSibling;
+ var p = $('');
- defaultCleaner: false,
+ // while the next node is text or not a block level element
+ // append it to a new p node
+ while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))) {
+ var nextSibling = sibling.nextSibling;
+ $(sibling).appendTo(p);
+ sibling = nextSibling;
+ }
- clean: ['.stream-item-footer', 'button', '.tweet-details-fixer']
- },
+ $node.replaceWith(p);
+ $node.remove();
+ return $;
+ }
- author: {
- selectors: ['.tweet.permalink-tweet .username']
- },
+ return $;
+}
- date_published: {
- selectors: [['.permalink-tweet ._timestamp[data-time-ms]', 'data-time-ms']]
- }
+function convertDivs($) {
+ $('div').each(function (index, div) {
+ var $div = $(div);
+ var convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;
-};
+ if (convertable) {
+ convertNodeTo$$1($div, $, 'p');
+ }
+ });
-var NYTimesExtractor = {
- domain: 'www.nytimes.com',
+ return $;
+}
- title: {
- selectors: ['.g-headline', 'h1.headline']
- },
+function convertSpans($) {
+ $('span').each(function (index, span) {
+ var $span = $(span);
+ var convertable = $span.parents('p, div').length === 0;
+ if (convertable) {
+ convertNodeTo$$1($span, $, 'p');
+ }
+ });
- author: {
- selectors: [['meta[name="author"]', 'value'], '.g-byline', '.byline']
- },
+ return $;
+}
- content: {
- selectors: ['div.g-blocks', 'article#story'],
+// Loop through the provided doc, and convert any p-like elements to
+// actual paragraph tags.
+//
+// Things fitting this criteria:
+// * Multiple consecutive tags.
+// * tags without block level elements inside of them
+// * tags who are not children of or tags.
+//
+// :param $: A cheerio object to search
+// :return cheerio object with new p elements
+// (By-reference mutation, though. Returned just for convenience.)
- defaultCleaner: false,
+function convertToParagraphs$$1($) {
+ $ = brsToPs$$1($);
+ $ = convertDivs($);
+ $ = convertSpans($);
- transforms: {
- 'img.g-lazy': function imgGLazy($node) {
- var src = $node.attr('src');
- // const widths = $node.attr('data-widths')
- // .slice(1)
- // .slice(0, -1)
- // .split(',');
- // if (widths.length) {
- // width = widths.slice(-1);
- // } else {
- // width = '900';
- // }
- var width = 640;
+ return $;
+}
- src = src.replace('{{size}}', width);
- $node.attr('src', src);
- }
- },
+function convertNodeTo$$1($node, $) {
+ var tag = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'p';
- clean: ['.ad', 'header#story-header', '.story-body-1 .lede.video', '.visually-hidden', '#newsletter-promo', '.promo', '.comments-button', '.hidden']
- },
+ var node = $node.get(0);
+ if (!node) {
+ return $;
+ }
+ var attrs = getAttrs(node) || {};
+ // console.log(attrs)
- date_published: null,
+ var attribString = _Reflect$ownKeys(attrs).map(function (key) {
+ return key + '=' + attrs[key];
+ }).join(' ');
+ var html = void 0;
- lead_image_url: null,
+ if ($.browser) {
+ // In the browser, the contents of noscript tags aren't rendered, therefore
+ // transforms on the noscript tag (commonly used for lazy-loading) don't work
+ // as expected. This test case handles that
+ html = node.tagName.toLowerCase() === 'noscript' ? $node.text() : $node.html();
+ } else {
+ html = $node.contents();
+ }
+ $node.replaceWith('<' + tag + ' ' + attribString + '>' + html + '' + tag + '>');
+ return $;
+}
- dek: null,
+function cleanForHeight($img, $) {
+ var height = parseInt($img.attr('height'), 10);
+ var width = parseInt($img.attr('width'), 10) || 20;
- next_page_url: null,
+ // Remove images that explicitly have very small heights or
+ // widths, because they are most likely shims or icons,
+ // which aren't very useful for reading.
+ if ((height || 20) < 10 || width < 10) {
+ $img.remove();
+ } else if (height) {
+ // Don't ever specify a height on images, so that we can
+ // scale with respect to width without screwing up the
+ // aspect ratio.
+ $img.removeAttr('height');
+ }
- excerpt: null
-};
+ return $;
+}
-// Rename CustomExtractor
-// to fit your publication
-var TheAtlanticExtractor = {
- domain: 'www.theatlantic.com',
- title: {
- selectors: ['h1.hed']
- },
-
- author: {
- selectors: ['article#article .article-cover-extra .metadata .byline a']
- },
-
- content: {
- selectors: ['.article-body'],
+// Cleans out images where the source string matches transparent/spacer/etc
+// TODO This seems very aggressive - AP
+function removeSpacers($img, $) {
+ if (SPACER_RE.test($img.attr('src'))) {
+ $img.remove();
+ }
- // Is there anything in the content you selected that needs transformed
- // before it's consumable content? E.g., unusual lazy loaded images
- transforms: [],
+ return $;
+}
- // Is there anything that is in the result that shouldn't be?
- // The clean selectors will remove anything that matches from
- // the result
- clean: []
- },
+function cleanImages($article, $) {
+ $article.find('img').each(function (index, img) {
+ var $img = $(img);
- date_published: null,
+ cleanForHeight($img, $);
+ removeSpacers($img, $);
+ });
- lead_image_url: null,
+ return $;
+}
- dek: null,
+function markToKeep(article, $, url) {
+ var tags = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : [];
- next_page_url: null,
+ if (tags.length === 0) {
+ tags = KEEP_SELECTORS;
+ }
- excerpt: null
-};
+ if (url) {
+ var _URL$parse = URL.parse(url),
+ protocol = _URL$parse.protocol,
+ hostname = _URL$parse.hostname;
-// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var NewYorkerExtractor = {
- domain: 'www.newyorker.com',
- title: {
- selectors: ['h1.title']
- },
+ tags = [].concat(_toConsumableArray(tags), ['iframe[src^="' + protocol + '//' + hostname + '"]']);
+ }
- author: {
- selectors: ['.contributors']
- },
+ $(tags.join(','), article).addClass(KEEP_CLASS);
- content: {
- selectors: ['div#articleBody', 'div.articleBody'],
+ return $;
+}
- // Is there anything in the content you selected that needs transformed
- // before it's consumable content? E.g., unusual lazy loaded images
- transforms: [],
+function stripJunkTags(article, $) {
+ var tags = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : [];
- // Is there anything that is in the result that shouldn't be?
- // The clean selectors will remove anything that matches from
- // the result
- clean: []
- },
+ if (tags.length === 0) {
+ tags = STRIP_OUTPUT_TAGS;
+ }
- date_published: {
- selectors: [['meta[name="article:published_time"]', 'value']]
- },
+ // Remove matching elements, but ignore
+ // any element with a class of mercury-parser-keep
+ $(tags.join(','), article).not('.' + KEEP_CLASS).remove();
- lead_image_url: {
- selectors: [['meta[name="og:image"]', 'value']]
- },
+ // Remove the mercury-parser-keep class from result
+ $('.' + KEEP_CLASS, article).removeClass(KEEP_CLASS);
- dek: {
- selectors: [['meta[name="og:description"]', 'value']]
- },
+ return $;
+}
- next_page_url: null,
+// H1 tags are typically the article title, which should be extracted
+// by the title extractor instead. If there's less than 3 of them (<3),
+// strip them. Otherwise, turn 'em into H2s.
+function cleanHOnes$$1(article, $) {
+ var $hOnes = $('h1', article);
- excerpt: null
-};
+ if ($hOnes.length < 3) {
+ $hOnes.each(function (index, node) {
+ return $(node).remove();
+ });
+ } else {
+ $hOnes.each(function (index, node) {
+ convertNodeTo$$1($(node), $, 'h2');
+ });
+ }
-// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var WiredExtractor = {
- domain: 'www.wired.com',
- title: {
- selectors: ['h1.post-title']
- },
+ return $;
+}
- author: {
- selectors: ['a[rel="author"]']
- },
+function removeAllButWhitelist($article) {
+ $article.find('*').each(function (index, node) {
+ var attrs = getAttrs(node);
- content: {
- selectors: ['article.content'],
+ setAttrs(node, _Reflect$ownKeys(attrs).reduce(function (acc, attr) {
+ if (WHITELIST_ATTRS_RE.test(attr)) {
+ return _extends({}, acc, _defineProperty({}, attr, attrs[attr]));
+ }
- // Is there anything in the content you selected that needs transformed
- // before it's consumable content? E.g., unusual lazy loaded images
- transforms: [],
+ return acc;
+ }, {}));
+ });
- // Is there anything that is in the result that shouldn't be?
- // The clean selectors will remove anything that matches from
- // the result
- clean: ['.visually-hidden']
- },
+ return $article;
+}
- date_published: {
- selectors: [['meta[itemprop="datePublished"]', 'value']]
- },
+// function removeAttrs(article, $) {
+// REMOVE_ATTRS.forEach((attr) => {
+// $(`[${attr}]`, article).removeAttr(attr);
+// });
+// }
- lead_image_url: {
- selectors: [['meta[name="og:image"]', 'value']]
- },
+// Remove attributes like style or align
+function cleanAttributes$$1($article) {
+ // Grabbing the parent because at this point
+ // $article will be wrapped in a div which will
+ // have a score set on it.
+ return removeAllButWhitelist($article.parent().length ? $article.parent() : $article);
+}
- dek: {
- selectors: [['meta[name="og:description"]', 'value']]
- },
+function removeEmpty($article, $) {
+ $article.find('p').each(function (index, p) {
+ var $p = $(p);
+ if ($p.find('iframe, img').length === 0 && $p.text().trim() === '') $p.remove();
+ });
- next_page_url: null,
+ return $;
+}
- excerpt: null
-};
+// // CONTENT FETCHING CONSTANTS ////
-// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var MSNExtractor = {
- domain: 'www.msn.com',
- title: {
- selectors: ['h1']
- },
+// A list of strings that can be considered unlikely candidates when
+// extracting content from a resource. These strings are joined together
+// and then tested for existence using re:test, so may contain simple,
+// non-pipe style regular expression queries if necessary.
+var UNLIKELY_CANDIDATES_BLACKLIST$1 = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot', 'form', 'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.
+'menu', 'meta', 'nav', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box
+'presence_control_external', // lifehacker.com container full of false positives
+'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'tools'];
- author: {
- selectors: ['span.authorname-txt']
- },
+// A list of strings that can be considered LIKELY candidates when
+// extracting content from a resource. Essentially, the inverse of the
+// blacklist above - if something matches both blacklist and whitelist,
+// it is kept. This is useful, for example, if something has a className
+// of "rss-content entry-content". It matched 'rss', so it would normally
+// be removed, however, it's also the entry content, so it should be left
+// alone.
+//
+// These strings are joined together and then tested for existence using
+// re:test, so may contain simple, non-pipe style regular expression queries
+// if necessary.
+var UNLIKELY_CANDIDATES_WHITELIST$1 = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form
+'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];
- content: {
- selectors: ['div.richtext'],
+// A list of tags which, if found inside, should cause a to NOT
+// be turned into a paragraph tag. Shallow div tags without these elements
+// should be turned into tags.
+var DIV_TO_P_BLOCK_TAGS$1 = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');
- // Is there anything in the content you selected that needs transformed
- // before it's consumable content? E.g., unusual lazy loaded images
- transforms: [],
+// A list of tags that should be ignored when trying to find the top candidate
+// for a document.
+var NON_TOP_CANDIDATE_TAGS$1 = ['br', 'b', 'i', 'label', 'hr', 'area', 'base', 'basefont', 'input', 'img', 'link', 'meta'];
- // Is there anything that is in the result that shouldn't be?
- // The clean selectors will remove anything that matches from
- // the result
- clean: ['span.caption']
- },
+var NON_TOP_CANDIDATE_TAGS_RE$1 = new RegExp('^(' + NON_TOP_CANDIDATE_TAGS$1.join('|') + ')$', 'i');
- date_published: {
- selectors: ['span.time']
- },
+// A list of selectors that specify, very clearly, either hNews or other
+// very content-specific style content, like Blogger templates.
+// More examples here: http://microformats.org/wiki/blog-post-formats
+var HNEWS_CONTENT_SELECTORS$1 = [['.hentry', '.entry-content'], ['entry', '.entry-content'], ['.entry', '.entry_content'], ['.post', '.postbody'], ['.post', '.post_body'], ['.post', '.post-body']];
- lead_image_url: {
- selectors: []
- },
+var PHOTO_HINTS$1 = ['figure', 'photo', 'image', 'caption'];
+var PHOTO_HINTS_RE$1 = new RegExp(PHOTO_HINTS$1.join('|'), 'i');
- dek: {
- selectors: [['meta[name="description"]', 'value']]
- },
+// A list of strings that denote a positive scoring for this content as being
+// an article container. Checked against className and id.
+//
+// TODO: Perhaps have these scale based on their odds of being quality?
+var POSITIVE_SCORE_HINTS$1 = ['article', 'articlecontent', 'instapaper_body', 'blog', 'body', 'content', 'entry-content-asset', 'entry', 'hentry', 'main', 'Normal', 'page', 'pagination', 'permalink', 'post', 'story', 'text', '[-_]copy', // usatoday
+'\\Bcopy'];
- next_page_url: null,
+// The above list, joined into a matching regular expression
+var POSITIVE_SCORE_RE$1 = new RegExp(POSITIVE_SCORE_HINTS$1.join('|'), 'i');
- excerpt: null
-};
+// Readability publisher-specific guidelines
+var READABILITY_ASSET$1 = new RegExp('entry-content-asset', 'i');
-// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var YahooExtractor = {
- domain: 'www.yahoo.com',
- title: {
- selectors: ['header.canvas-header']
- },
-
- author: {
- selectors: ['span.provider-name']
- },
+// A list of strings that denote a negative scoring for this content as being
+// an article container. Checked against className and id.
+//
+// TODO: Perhaps have these scale based on their odds of being quality?
+var NEGATIVE_SCORE_HINTS$1 = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'bottom', 'byline', 'clear', 'com-', 'combx', 'comment', 'comment\\B', 'contact', 'copy', 'credit', 'crumb', 'date', 'deck', 'excerpt', 'featured', // tnr.com has a featured_content which throws us off
+'foot', 'footer', 'footnote', 'graf', 'head', 'info', 'infotext', // newscientist.com copyright
+'instapaper_ignore', 'jump', 'linebreak', 'link', 'masthead', 'media', 'meta', 'modal', 'outbrain', // slate.com junk
+'promo', 'pr_', // autoblog - press release
+'related', 'respond', 'roundcontent', // lifehacker restricted content warning
+'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget'];
+// The above list, joined into a matching regular expression
+var NEGATIVE_SCORE_RE$1 = new RegExp(NEGATIVE_SCORE_HINTS$1.join('|'), 'i');
- content: {
- selectors: [
- // enter content selectors
- '.content-canvas'],
+// Match a digit. Pretty clear.
- // Is there anything in the content you selected that needs transformed
- // before it's consumable content? E.g., unusual lazy loaded images
- transforms: [],
- // Is there anything that is in the result that shouldn't be?
- // The clean selectors will remove anything that matches from
- // the result
- clean: ['.figure-caption']
- },
+// Match 2 or more consecutive tags
- date_published: {
- selectors: [['time.date[datetime]', 'datetime']]
- },
- lead_image_url: {
- selectors: [['meta[name="og:image"]', 'value']]
- },
+// Match 1 BR tag.
- dek: {
- selectors: [['meta[name="og:description"]', 'value']]
- },
- next_page_url: null,
+// A list of all of the block level tags known in HTML5 and below. Taken from
+// http://bit.ly/qneNIT
- excerpt: null
-};
-// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var BuzzfeedExtractor = {
- domain: 'www.buzzfeed.com',
- title: {
- selectors: ['h1[id="post-title"]']
- },
- author: {
- selectors: ['a[data-action="user/username"]', 'byline__author']
- },
+// The removal is implemented as a blacklist and whitelist, this test finds
+// blacklisted elements that aren't whitelisted. We do this all in one
+// expression-both because it's only one pass, and because this skips the
+// serialization for whitelisted nodes.
+var candidatesBlacklist$1 = UNLIKELY_CANDIDATES_BLACKLIST$1.join('|');
- content: {
- selectors: ['#buzz_sub_buzz'],
- defaultCleaner: false,
+var candidatesWhitelist$1 = UNLIKELY_CANDIDATES_WHITELIST$1.join('|');
- // Is there anything in the content you selected that needs transformed
- // before it's consumable content? E.g., unusual lazy loaded images
- transforms: {
- h2: 'b'
- },
- // Is there anything that is in the result that shouldn't be?
- // The clean selectors will remove anything that matches from
- // the result
- clean: ['.instapaper_ignore', '.suplist_list_hide .buzz_superlist_item .buzz_superlist_number_inline', '.share-box']
- },
- date_published: {
- selectors: ['.buzz-datetime']
- },
- lead_image_url: {
- selectors: [['meta[name="og:image"]', 'value']]
- },
+var PARAGRAPH_SCORE_TAGS$1 = new RegExp('^(p|li|span|pre)$', 'i');
+var CHILD_CONTENT_TAGS$1 = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');
+var BAD_TAGS$1 = new RegExp('^(address|form)$', 'i');
- dek: {
- selectors: [['meta[name="description"]', 'value']]
- },
+// Get the score of a node based on its className and id.
+function getWeight(node) {
+ var classes = node.attr('class');
+ var id = node.attr('id');
+ var score = 0;
- next_page_url: null,
+ if (id) {
+ // if id exists, try to score on both positive and negative
+ if (POSITIVE_SCORE_RE$1.test(id)) {
+ score += 25;
+ }
+ if (NEGATIVE_SCORE_RE$1.test(id)) {
+ score -= 25;
+ }
+ }
- excerpt: null
-};
+ if (classes) {
+ if (score === 0) {
+ // if classes exist and id did not contribute to score
+ // try to score on both positive and negative
+ if (POSITIVE_SCORE_RE$1.test(classes)) {
+ score += 25;
+ }
+ if (NEGATIVE_SCORE_RE$1.test(classes)) {
+ score -= 25;
+ }
+ }
-// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var WikiaExtractor = {
- domain: 'fandom.wikia.com',
- title: {
- selectors: ['h1.entry-title']
- },
+ // even if score has been set by id, add score for
+ // possible photo matches
+ // "try to keep photos if we can"
+ if (PHOTO_HINTS_RE$1.test(classes)) {
+ score += 10;
+ }
- author: {
- selectors: ['.author vcard', '.fn']
- },
+ // add 25 if class matches entry-content-asset,
+ // a class apparently instructed for use in the
+ // Readability publisher guidelines
+ // https://www.readability.com/developers/guidelines
+ if (READABILITY_ASSET$1.test(classes)) {
+ score += 25;
+ }
+ }
- content: {
- selectors: ['.grid-content', '.entry-content'],
+ return score;
+}
- // Is there anything in the content you selected that needs transformed
- // before it's consumable content? E.g., unusual lazy loaded images
- transforms: [],
+// returns the score of a node based on
+// the node's score attribute
+// returns null if no score set
+function getScore($node) {
+ return parseFloat($node.attr('score')) || null;
+}
- // Is there anything that is in the result that shouldn't be?
- // The clean selectors will remove anything that matches from
- // the result
- clean: []
- },
+// return 1 for every comma in text
+function scoreCommas(text) {
+ return (text.match(/,/g) || []).length;
+}
- date_published: {
- selectors: [['meta[name="article:published_time"]', 'value']]
- },
+var idkRe = new RegExp('^(p|pre)$', 'i');
- lead_image_url: {
- selectors: [['meta[name="og:image"]', 'value']]
- },
+function scoreLength(textLength) {
+ var tagName = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'p';
- dek: {
- selectors: [['meta[name="og:description"]', 'value']]
- },
+ var chunks = textLength / 50;
- next_page_url: null,
+ if (chunks > 0) {
+ var lengthBonus = void 0;
- excerpt: null
-};
+ // No idea why p or pre are being tamped down here
+ // but just following the source for now
+ // Not even sure why tagName is included here,
+ // since this is only being called from the context
+ // of scoreParagraph
+ if (idkRe.test(tagName)) {
+ lengthBonus = chunks - 2;
+ } else {
+ lengthBonus = chunks - 1.25;
+ }
-// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var LittleThingsExtractor = {
- domain: 'www.littlethings.com',
- title: {
- selectors: ['h1.post-title']
- },
+ return Math.min(Math.max(lengthBonus, 0), 3);
+ }
- author: {
- selectors: [['meta[name="author"]', 'value']]
- },
+ return 0;
+}
- content: {
- selectors: [
- // enter content selectors
- '.mainContentIntro', '.content-wrapper'],
+// Score a paragraph using various methods. Things like number of
+// commas, etc. Higher is better.
+function scoreParagraph$$1(node) {
+ var score = 1;
+ var text = node.text().trim();
+ var textLength = text.length;
- // Is there anything in the content you selected that needs transformed
- // before it's consumable content? E.g., unusual lazy loaded images
- transforms: [],
+ // If this paragraph is less than 25 characters, don't count it.
+ if (textLength < 25) {
+ return 0;
+ }
- // Is there anything that is in the result that shouldn't be?
- // The clean selectors will remove anything that matches from
- // the result
- clean: []
- },
+ // Add points for any commas within this paragraph
+ score += scoreCommas(text);
- lead_image_url: {
- selectors: [['meta[name="og:image"]', 'value']]
- },
+ // For every 50 characters in this paragraph, add another point. Up
+ // to 3 points.
+ score += scoreLength(textLength);
- next_page_url: null,
+ // Articles can end with short paragraphs when people are being clever
+ // but they can also end with short paragraphs setting up lists of junk
+ // that we strip. This negative tweaks junk setup paragraphs just below
+ // the cutoff threshold.
+ if (text.slice(-1) === ':') {
+ score -= 1;
+ }
- excerpt: null
-};
+ return score;
+}
-// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var PoliticoExtractor = {
- domain: 'www.politico.com',
- title: {
- selectors: [
- // enter title selectors
- ['meta[name="og:title"]', 'value']]
- },
+function setScore($node, $, score) {
+ $node.attr('score', score);
+ return $node;
+}
- author: {
- selectors: ['.story-main-content .byline .vcard']
- },
+function addScore$$1($node, $, amount) {
+ try {
+ var score = getOrInitScore$$1($node, $) + amount;
+ setScore($node, $, score);
+ } catch (e) {
+ // Ignoring; error occurs in scoreNode
+ }
- content: {
- selectors: [
- // enter content selectors
- '.story-main-content', '.content-group', '.story-core', '.story-text'],
+ return $node;
+}
- // Is there anything in the content you selected that needs transformed
- // before it's consumable content? E.g., unusual lazy loaded images
- transforms: [],
+// Adds 1/4 of a child's score to its parent
+function addToParent$$1(node, $, score) {
+ var parent = node.parent();
+ if (parent) {
+ addScore$$1(parent, $, score * 0.25);
+ }
- // Is there anything that is in the result that shouldn't be?
- // The clean selectors will remove anything that matches from
- // the result
- clean: ['figcaption']
- },
+ return node;
+}
- date_published: {
- selectors: [['.story-main-content .timestamp time[datetime]', 'datetime']]
- },
+// gets and returns the score if it exists
+// if not, initializes a score based on
+// the node's tag type
+function getOrInitScore$$1($node, $) {
+ var weightNodes = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : true;
- lead_image_url: {
- selectors: [
- // enter lead_image_url selectors
- ['meta[name="og:image"]', 'value']]
- },
+ var score = getScore($node);
- dek: {
- selectors: [['meta[name="description"]', 'value']]
- },
+ if (score) {
+ return score;
+ }
- next_page_url: null,
+ score = scoreNode$$1($node);
- excerpt: null
-};
+ if (weightNodes) {
+ score += getWeight($node);
+ }
-var DeadspinExtractor = {
- domain: 'deadspin.com',
+ addToParent$$1($node, $, score);
- supportedDomains: ['jezebel.com', 'lifehacker.com', 'kotaku.com', 'gizmodo.com', 'jalopnik.com', 'kinja.com'],
+ return score;
+}
- title: {
- selectors: ['h1.headline']
- },
+// Score an individual node. Has some smarts for paragraphs, otherwise
+// just scores based on tag.
+function scoreNode$$1($node) {
+ var _$node$get = $node.get(0),
+ tagName = _$node$get.tagName;
- author: {
- selectors: ['.author']
- },
+ // TODO: Consider ordering by most likely.
+ // E.g., if divs are a more common tag on a page,
+ // Could save doing that regex test on every node – AP
- content: {
- selectors: ['.post-content', '.entry-content'],
- // Is there anything in the content you selected that needs transformed
- // before it's consumable content? E.g., unusual lazy loaded images
- transforms: {
- 'iframe.lazyload[data-recommend-id^="youtube://"]': function iframeLazyloadDataRecommendIdYoutube($node) {
- var youtubeId = $node.attr('id').split('youtube-')[1];
- $node.attr('src', 'https://www.youtube.com/embed/' + youtubeId);
- }
- },
+ if (PARAGRAPH_SCORE_TAGS$1.test(tagName)) {
+ return scoreParagraph$$1($node);
+ } else if (tagName.toLowerCase() === 'div') {
+ return 5;
+ } else if (CHILD_CONTENT_TAGS$1.test(tagName)) {
+ return 3;
+ } else if (BAD_TAGS$1.test(tagName)) {
+ return -3;
+ } else if (tagName.toLowerCase() === 'th') {
+ return -5;
+ }
- // Is there anything that is in the result that shouldn't be?
- // The clean selectors will remove anything that matches from
- // the result
- clean: []
- },
+ return 0;
+}
- date_published: {
- selectors: [['time.updated[datetime]', 'datetime']]
- },
+function convertSpans$1($node, $) {
+ if ($node.get(0)) {
+ var _$node$get = $node.get(0),
+ tagName = _$node$get.tagName;
- lead_image_url: {
- selectors: [['meta[name="og:image"]', 'value']]
- },
+ if (tagName === 'span') {
+ // convert spans to divs
+ convertNodeTo$$1($node, $, 'div');
+ }
+ }
+}
- dek: {
- selectors: [
- // enter selectors
- ]
- },
+function addScoreTo($node, $, score) {
+ if ($node) {
+ convertSpans$1($node, $);
+ addScore$$1($node, $, score);
+ }
+}
- next_page_url: {
- selectors: [
- // enter selectors
- ]
- },
+function scorePs($, weightNodes) {
+ $('p, pre').not('[score]').each(function (index, node) {
+ // The raw score for this paragraph, before we add any parent/child
+ // scores.
+ var $node = $(node);
+ $node = setScore($node, $, getOrInitScore$$1($node, $, weightNodes));
- excerpt: {
- selectors: [
- // enter selectors
- ]
- }
-};
+ var $parent = $node.parent();
+ var rawScore = scoreNode$$1($node);
-// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var BroadwayWorldExtractor = {
- domain: 'www.broadwayworld.com',
- title: {
- selectors: ['h1.article-title']
- },
+ addScoreTo($parent, $, rawScore, weightNodes);
+ if ($parent) {
+ // Add half of the individual content score to the
+ // grandparent
+ addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);
+ }
+ });
- author: {
- selectors: ['span[itemprop=author]']
- },
+ return $;
+}
- content: {
- selectors: ['div[itemprop=articlebody]'],
+// score content. Parents get the full value of their children's
+// content score, grandparents half
+function scoreContent$$1($) {
+ var weightNodes = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : true;
- // Is there anything in the content you selected that needs transformed
- // before it's consumable content? E.g., unusual lazy loaded images
- transforms: {},
+ // First, look for special hNews based selectors and give them a big
+ // boost, if they exist
+ HNEWS_CONTENT_SELECTORS$1.forEach(function (_ref) {
+ var _ref2 = _slicedToArray(_ref, 2),
+ parentSelector = _ref2[0],
+ childSelector = _ref2[1];
- // Is there anything that is in the result that shouldn't be?
- // The clean selectors will remove anything that matches from
- // the result
- clean: []
- },
+ $(parentSelector + ' ' + childSelector).each(function (index, node) {
+ addScore$$1($(node).parent(parentSelector), $, 80);
+ });
+ });
- date_published: {
- selectors: [['meta[itemprop=datePublished]', 'value']]
- },
+ // Doubling this again
+ // Previous solution caused a bug
+ // in which parents weren't retaining
+ // scores. This is not ideal, and
+ // should be fixed.
+ scorePs($, weightNodes);
+ scorePs($, weightNodes);
- lead_image_url: {
- selectors: [['meta[name="og:image"]', 'value']]
- },
+ return $;
+}
- dek: {
- selectors: [['meta[name="og:description"]', 'value']]
- },
+var NORMALIZE_RE = /\s{2,}/g;
- next_page_url: {
- selectors: [
- // enter selectors
- ]
- },
+function normalizeSpaces(text) {
+ return text.replace(NORMALIZE_RE, ' ').trim();
+}
- excerpt: {
- selectors: [
- // enter selectors
- ]
+// Given a node type to search for, and a list of regular expressions,
+// look to see if this extraction can be found in the URL. Expects
+// that each expression in r_list will return group(1) as the proper
+// string to be cleaned.
+// Only used for date_published currently.
+function extractFromUrl(url, regexList) {
+ var matchRe = regexList.find(function (re) {
+ return re.test(url);
+ });
+ if (matchRe) {
+ return matchRe.exec(url)[1];
}
-};
-// Rename CustomExtractor
-// to fit your publication
-// (e.g., NYTimesExtractor)
-var ApartmentTherapyExtractor = {
- domain: 'www.apartmenttherapy.com',
- title: {
- selectors: ['h1.headline']
- },
-
- author: {
- selectors: ['.PostByline__name']
- },
+ return null;
+}
- content: {
- selectors: ['div.post__content'],
+// An expression that looks to try to find the page digit within a URL, if
+// it exists.
+// Matches:
+// page=1
+// pg=1
+// p=1
+// paging=12
+// pag=7
+// pagination/1
+// paging/88
+// pa/83
+// p/11
+//
+// Does not match:
+// pg=102
+// page:2
+var PAGE_IN_HREF_RE = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');
- // Is there anything in the content you selected that needs transformed
- // before it's consumable content? E.g., unusual lazy loaded images
- transforms: {
- 'div[data-render-react-id="images/LazyPicture"]': function divDataRenderReactIdImagesLazyPicture($node, $) {
- var data = JSON.parse($node.attr('data-props'));
- var src = data.sources[0].src;
+var HAS_ALPHA_RE = /[a-z]/i;
- var $img = $('').attr('src', src);
- $node.replaceWith($img);
- }
- },
+var IS_ALPHA_RE = /^[a-z]+$/i;
+var IS_DIGIT_RE = /^[0-9]+$/i;
- // Is there anything that is in the result that shouldn't be?
- // The clean selectors will remove anything that matches from
- // the result
- clean: []
- },
+function pageNumFromUrl(url) {
+ var matches = url.match(PAGE_IN_HREF_RE);
+ if (!matches) return null;
- date_published: {
- selectors: [['.PostByline__timestamp[datetime]', 'datetime']]
- },
+ var pageNum = parseInt(matches[6], 10);
- lead_image_url: {
- selectors: [['meta[name="og:image"]', 'value']]
- },
+ // Return pageNum < 100, otherwise
+ // return null
+ return pageNum < 100 ? pageNum : null;
+}
- dek: {
- selectors: [['meta[name=description]', 'value']]
- },
+function removeAnchor(url) {
+ return url.split('#')[0].replace(/\/$/, '');
+}
- next_page_url: {
- selectors: [
- // enter selectors
- ]
- },
+function isGoodSegment(segment, index, firstSegmentHasLetters) {
+ var goodSegment = true;
- excerpt: {
- selectors: [
- // enter selectors
- ]
+ // If this is purely a number, and it's the first or second
+ // url_segment, it's probably a page number. Remove it.
+ if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {
+ goodSegment = true;
}
-};
-
-var MediumExtractor = {
- domain: 'medium.com',
-
- supportedDomains: ['trackchanges.postlight.com'],
-
- title: {
- selectors: ['h1']
- },
-
- author: {
- selectors: [['meta[name="author"]', 'value']]
- },
-
- content: {
- selectors: ['.section-content'],
-
- // Is there anything in the content you selected that needs transformed
- // before it's consumable content? E.g., unusual lazy loaded images
- transforms: {
- // Re-write lazy-loaded youtube videos
- iframe: function iframe($node) {
- var ytRe = /https:\/\/i.embed.ly\/.+url=https:\/\/i\.ytimg\.com\/vi\/(\w+)\//;
- var thumb = decodeURIComponent($node.attr('data-thumbnail'));
- if (ytRe.test(thumb)) {
- var _thumb$match = thumb.match(ytRe),
- _thumb$match2 = _slicedToArray(_thumb$match, 2),
- _ = _thumb$match2[0],
- youtubeId = _thumb$match2[1]; // eslint-disable-line
-
-
- $node.attr('src', 'https://www.youtube.com/embed/' + youtubeId);
- var $parent = $node.parents('figure');
- $parent.prepend($node.clone());
- $node.remove();
- }
- }
- },
-
- // Is there anything that is in the result that shouldn't be?
- // The clean selectors will remove anything that matches from
- // the result
- clean: []
- },
-
- date_published: {
- selectors: [['time[datetime]', 'datetime']]
- },
-
- lead_image_url: {
- selectors: [['meta[name="og:image"]', 'value']]
- },
-
- dek: {
- selectors: [
- // enter selectors
- ]
- },
-
- next_page_url: {
- selectors: [
- // enter selectors
- ]
- },
-
- excerpt: {
- selectors: [
- // enter selectors
- ]
+ // If this is the first url_segment and it's just "index",
+ // remove it
+ if (index === 0 && segment.toLowerCase() === 'index') {
+ goodSegment = false;
}
-};
+ // If our first or second url_segment is smaller than 3 characters,
+ // and the first url_segment had no alphas, remove it.
+ if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {
+ goodSegment = false;
+ }
+ return goodSegment;
+}
-var CustomExtractors = Object.freeze({
- BloggerExtractor: BloggerExtractor,
- NYMagExtractor: NYMagExtractor,
- WikipediaExtractor: WikipediaExtractor,
- TwitterExtractor: TwitterExtractor,
- NYTimesExtractor: NYTimesExtractor,
- TheAtlanticExtractor: TheAtlanticExtractor,
- NewYorkerExtractor: NewYorkerExtractor,
- WiredExtractor: WiredExtractor,
- MSNExtractor: MSNExtractor,
- YahooExtractor: YahooExtractor,
- BuzzfeedExtractor: BuzzfeedExtractor,
- WikiaExtractor: WikiaExtractor,
- LittleThingsExtractor: LittleThingsExtractor,
- PoliticoExtractor: PoliticoExtractor,
- DeadspinExtractor: DeadspinExtractor,
- BroadwayWorldExtractor: BroadwayWorldExtractor,
- ApartmentTherapyExtractor: ApartmentTherapyExtractor,
- MediumExtractor: MediumExtractor
-});
-
-var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {
- var extractor = CustomExtractors[key];
- return _extends({}, acc, mergeSupportedDomains(extractor));
-}, {});
-
-// Spacer images to be removed
-var SPACER_RE = new RegExp('trans|transparent|spacer|blank', 'i');
+// Take a URL, and return the article base of said URL. That is, no
+// pagination data exists in it. Useful for comparing to other links
+// that might have pagination data within them.
+function articleBaseUrl(url, parsed) {
+ var parsedUrl = parsed || URL.parse(url);
+ var protocol = parsedUrl.protocol,
+ host = parsedUrl.host,
+ path = parsedUrl.path;
-// The class we will use to mark elements we want to keep
-// but would normally remove
-var KEEP_CLASS = 'mercury-parser-keep';
-var KEEP_SELECTORS = ['iframe[src^="https://www.youtube.com"]', 'iframe[src^="http://www.youtube.com"]', 'iframe[src^="https://player.vimeo"]', 'iframe[src^="http://player.vimeo"]'];
+ var firstSegmentHasLetters = false;
+ var cleanedSegments = path.split('/').reverse().reduce(function (acc, rawSegment, index) {
+ var segment = rawSegment;
-// A list of tags to strip from the output if we encounter them.
-var STRIP_OUTPUT_TAGS = ['title', 'script', 'noscript', 'link', 'style', 'hr', 'embed', 'iframe', 'object'];
+ // Split off and save anything that looks like a file type.
+ if (segment.includes('.')) {
+ var _segment$split = segment.split('.'),
+ _segment$split2 = _slicedToArray(_segment$split, 2),
+ possibleSegment = _segment$split2[0],
+ fileExt = _segment$split2[1];
-// cleanAttributes
-var REMOVE_ATTRS = ['style', 'align'];
-var REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(function (selector) {
- return '[' + selector + ']';
-});
-var REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');
-var WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt'];
-var WHITELIST_ATTRS_RE = new RegExp('^(' + WHITELIST_ATTRS.join('|') + ')$', 'i');
+ if (IS_ALPHA_RE.test(fileExt)) {
+ segment = possibleSegment;
+ }
+ }
-// removeEmpty
-var REMOVE_EMPTY_TAGS = ['p'];
-var REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(function (tag) {
- return tag + ':empty';
-}).join(',');
+ // If our first or second segment has anything looking like a page
+ // number, remove it.
+ if (PAGE_IN_HREF_RE.test(segment) && index < 2) {
+ segment = segment.replace(PAGE_IN_HREF_RE, '');
+ }
-// cleanTags
-var CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');
+ // If we're on the first segment, check to see if we have any
+ // characters in it. The first segment is actually the last bit of
+ // the URL, and this will be helpful to determine if we're on a URL
+ // segment that looks like "/2/" for example.
+ if (index === 0) {
+ firstSegmentHasLetters = HAS_ALPHA_RE.test(segment);
+ }
-// cleanHeaders
-var HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];
-var HEADER_TAG_LIST = HEADER_TAGS.join(',');
+ // If it's not marked for deletion, push it to cleaned_segments.
+ if (isGoodSegment(segment, index, firstSegmentHasLetters)) {
+ acc.push(segment);
+ }
-// // CONTENT FETCHING CONSTANTS ////
+ return acc;
+ }, []);
-// A list of strings that can be considered unlikely candidates when
-// extracting content from a resource. These strings are joined together
-// and then tested for existence using re:test, so may contain simple,
-// non-pipe style regular expression queries if necessary.
-var UNLIKELY_CANDIDATES_BLACKLIST = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot',
-// 'form', // This is too generic, has too many false positives
-'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.
-'menu', 'meta', 'nav', 'outbrain', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box
-'presence_control_external', // lifehacker.com container full of false positives
-'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'taboola', 'tools'];
+ return protocol + '//' + host + cleanedSegments.reverse().join('/');
+}
-// A list of strings that can be considered LIKELY candidates when
-// extracting content from a resource. Essentially, the inverse of the
-// blacklist above - if something matches both blacklist and whitelist,
-// it is kept. This is useful, for example, if something has a className
-// of "rss-content entry-content". It matched 'rss', so it would normally
-// be removed, however, it's also the entry content, so it should be left
-// alone.
-//
-// These strings are joined together and then tested for existence using
-// re:test, so may contain simple, non-pipe style regular expression queries
-// if necessary.
-var UNLIKELY_CANDIDATES_WHITELIST = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form
-'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];
+// Given a string, return True if it appears to have an ending sentence
+// within it, false otherwise.
+var SENTENCE_END_RE = new RegExp('.( |$)');
+function hasSentenceEnd(text) {
+ return SENTENCE_END_RE.test(text);
+}
-// A list of tags which, if found inside, should cause a to NOT
-// be turned into a paragraph tag. Shallow div tags without these elements
-// should be turned into tags.
-var DIV_TO_P_BLOCK_TAGS = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');
+function excerptContent(content) {
+ var words = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 10;
-// A list of tags that should be ignored when trying to find the top candidate
-// for a document.
+ return content.trim().split(/\s+/).slice(0, words).join(' ');
+}
+// Now that we have a top_candidate, look through the siblings of
+// it to see if any of them are decently scored. If they are, they
+// may be split parts of the content (Like two divs, a preamble and
+// a body.) Example:
+// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14
+function mergeSiblings($candidate, topScore, $) {
+ if (!$candidate.parent().length) {
+ return $candidate;
+ }
+ var siblingScoreThreshold = Math.max(10, topScore * 0.25);
+ var wrappingDiv = $('');
+ $candidate.parent().children().each(function (index, sibling) {
+ var $sibling = $(sibling);
+ // Ignore tags like BR, HR, etc
+ if (NON_TOP_CANDIDATE_TAGS_RE$1.test(sibling.tagName)) {
+ return null;
+ }
-// A list of selectors that specify, very clearly, either hNews or other
-// very content-specific style content, like Blogger templates.
-// More examples here: http://microformats.org/wiki/blog-post-formats
+ var siblingScore = getScore($sibling);
+ if (siblingScore) {
+ if ($sibling.get(0) === $candidate.get(0)) {
+ wrappingDiv.append($sibling);
+ } else {
+ var contentBonus = 0;
+ var density = linkDensity($sibling);
+ // If sibling has a very low link density,
+ // give it a small bonus
+ if (density < 0.05) {
+ contentBonus += 20;
+ }
+ // If sibling has a high link density,
+ // give it a penalty
+ if (density >= 0.5) {
+ contentBonus -= 20;
+ }
+ // If sibling node has the same class as
+ // candidate, give it a bonus
+ if ($sibling.attr('class') === $candidate.attr('class')) {
+ contentBonus += topScore * 0.2;
+ }
+ var newScore = siblingScore + contentBonus;
-// A list of strings that denote a positive scoring for this content as being
-// an article container. Checked against className and id.
-//
-// TODO: Perhaps have these scale based on their odds of being quality?
-var POSITIVE_SCORE_HINTS = ['article', 'articlecontent', 'instapaper_body', 'blog', 'body', 'content', 'entry-content-asset', 'entry', 'hentry', 'main', 'Normal', 'page', 'pagination', 'permalink', 'post', 'story', 'text', '[-_]copy', // usatoday
-'\\Bcopy'];
+ if (newScore >= siblingScoreThreshold) {
+ return wrappingDiv.append($sibling);
+ } else if (sibling.tagName === 'p') {
+ var siblingContent = $sibling.text();
+ var siblingContentLength = textLength(siblingContent);
-// The above list, joined into a matching regular expression
-var POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');
+ if (siblingContentLength > 80 && density < 0.25) {
+ return wrappingDiv.append($sibling);
+ } else if (siblingContentLength <= 80 && density === 0 && hasSentenceEnd(siblingContent)) {
+ return wrappingDiv.append($sibling);
+ }
+ }
+ }
+ }
-// Readability publisher-specific guidelines
+ return null;
+ });
+ if (wrappingDiv.children().length === 1 && wrappingDiv.children().first().get(0) === $candidate.get(0)) {
+ return $candidate;
+ }
-// A list of strings that denote a negative scoring for this content as being
-// an article container. Checked against className and id.
-//
-// TODO: Perhaps have these scale based on their odds of being quality?
-var NEGATIVE_SCORE_HINTS = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'bottom', 'byline', 'clear', 'com-', 'combx', 'comment', 'comment\\B', 'contact', 'copy', 'credit', 'crumb', 'date', 'deck', 'excerpt', 'featured', // tnr.com has a featured_content which throws us off
-'foot', 'footer', 'footnote', 'graf', 'head', 'info', 'infotext', // newscientist.com copyright
-'instapaper_ignore', 'jump', 'linebreak', 'link', 'masthead', 'media', 'meta', 'modal', 'outbrain', // slate.com junk
-'promo', 'pr_', // autoblog - press release
-'related', 'respond', 'roundcontent', // lifehacker restricted content warning
-'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget'];
-// The above list, joined into a matching regular expression
-var NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');
+ return wrappingDiv;
+}
-// XPath to try to determine if a page is wordpress. Not always successful.
-var IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';
+// After we've calculated scores, loop through all of the possible
+// candidate nodes we found and find the one with the highest score.
+function findTopCandidate$$1($) {
+ var $candidate = void 0;
+ var topScore = 0;
-// Match a digit. Pretty clear.
+ $('[score]').each(function (index, node) {
+ // Ignore tags like BR, HR, etc
+ if (NON_TOP_CANDIDATE_TAGS_RE$1.test(node.tagName)) {
+ return;
+ }
+ var $node = $(node);
+ var score = getScore($node);
-// A list of words that, if found in link text or URLs, likely mean that
-// this link is not a next page link.
+ if (score > topScore) {
+ topScore = score;
+ $candidate = $node;
+ }
+ });
+ // If we don't have a candidate, return the body
+ // or whatever the first element is
+ if (!$candidate) {
+ return $('body') || $('*').first();
+ }
+ $candidate = mergeSiblings($candidate, topScore, $);
-// Match any phrase that looks like it could be page, or paging, or pagination
-var PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');
+ return $candidate;
+}
-// Match any link text/classname/id that looks like it could mean the next
-// page. Things like: next, continue, >, >>, » but not >|, »| as those can
-// mean last page.
-// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\|]|$)|»([^\|]|$))', 'i');
+// Scoring
+function removeUnlessContent($node, $, weight) {
+ // Explicitly save entry-content-asset tags, which are
+ // noted as valuable in the Publisher guidelines. For now
+ // this works everywhere. We may want to consider making
+ // this less of a sure-thing later.
+ if ($node.hasClass('entry-content-asset')) {
+ return;
+ }
-// Match any link text/classname/id that looks like it is an end link: things
-// like "first", "last", "end", etc.
+ var content = normalizeSpaces($node.text());
+ if (scoreCommas(content) < 10) {
+ var pCount = $('p', $node).length;
+ var inputCount = $('input', $node).length;
-// Match any link text/classname/id that looks like it means the previous
-// page.
+ // Looks like a form, too many inputs.
+ if (inputCount > pCount / 3) {
+ $node.remove();
+ return;
+ }
+ var contentLength = content.length;
+ var imgCount = $('img', $node).length;
-// Match 2 or more consecutive tags
+ // Content is too short, and there are no images, so
+ // this is probably junk content.
+ if (contentLength < 25 && imgCount === 0) {
+ $node.remove();
+ return;
+ }
+ var density = linkDensity($node);
-// Match 1 BR tag.
+ // Too high of link density, is probably a menu or
+ // something similar.
+ // console.log(weight, density, contentLength)
+ if (weight < 25 && density > 0.2 && contentLength > 75) {
+ $node.remove();
+ return;
+ }
+ // Too high of a link density, despite the score being
+ // high.
+ if (weight >= 25 && density > 0.5) {
+ // Don't remove the node if it's a list and the
+ // previous sibling starts with a colon though. That
+ // means it's probably content.
+ var tagName = $node.get(0).tagName.toLowerCase();
+ var nodeIsList = tagName === 'ol' || tagName === 'ul';
+ if (nodeIsList) {
+ var previousNode = $node.prev();
+ if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {
+ return;
+ }
+ }
-// A list of all of the block level tags known in HTML5 and below. Taken from
-// http://bit.ly/qneNIT
-var BLOCK_LEVEL_TAGS = ['article', 'aside', 'blockquote', 'body', 'br', 'button', 'canvas', 'caption', 'col', 'colgroup', 'dd', 'div', 'dl', 'dt', 'embed', 'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'li', 'map', 'object', 'ol', 'output', 'p', 'pre', 'progress', 'section', 'table', 'tbody', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'ul', 'video'];
-var BLOCK_LEVEL_TAGS_RE = new RegExp('^(' + BLOCK_LEVEL_TAGS.join('|') + ')$', 'i');
+ $node.remove();
+ return;
+ }
-// The removal is implemented as a blacklist and whitelist, this test finds
-// blacklisted elements that aren't whitelisted. We do this all in one
-// expression-both because it's only one pass, and because this skips the
-// serialization for whitelisted nodes.
-var candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');
-var CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');
+ var scriptCount = $('script', $node).length;
-var candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');
-var CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');
+ // Too many script tags, not enough content.
+ if (scriptCount > 0 && contentLength < 150) {
+ $node.remove();
+ return;
+ }
+ }
+}
-function stripUnlikelyCandidates($) {
- // Loop through the provided document and remove any non-link nodes
- // that are unlikely candidates for article content.
- //
- // Links are ignored because there are very often links to content
- // that are identified as non-body-content, but may be inside
- // article-like content.
- //
- // :param $: a cheerio object to strip nodes from
- // :return $: the cleaned cheerio object
- $('*').not('a').each(function (index, node) {
+// Given an article, clean it of some superfluous content specified by
+// tags. Things like forms, ads, etc.
+//
+// Tags is an array of tag name's to search through. (like div, form,
+// etc)
+//
+// Return this same doc.
+function cleanTags$$1($article, $) {
+ $(CLEAN_CONDITIONALLY_TAGS, $article).each(function (index, node) {
var $node = $(node);
- var classes = $node.attr('class');
- var id = $node.attr('id');
- if (!id && !classes) return;
+ var weight = getScore($node);
+ if (!weight) {
+ weight = getOrInitScore$$1($node, $);
+ setScore($node, $, weight);
+ }
- var classAndId = (classes || '') + ' ' + (id || '');
- if (CANDIDATES_WHITELIST.test(classAndId)) {
- return;
- } else if (CANDIDATES_BLACKLIST.test(classAndId)) {
+ // drop node if its weight is < 0
+ if (weight < 0) {
$node.remove();
+ } else {
+ // deteremine if node seems like content
+ removeUnlessContent($node, $, weight);
}
});
return $;
}
-// ## NOTES:
-// Another good candidate for refactoring/optimizing.
-// Very imperative code, I don't love it. - AP
+function cleanHeaders($article, $) {
+ var title = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : '';
-// Given cheerio object, convert consecutive tags into
-// tags instead.
-//
-// :param $: A cheerio object
+ $(HEADER_TAG_LIST, $article).each(function (index, header) {
+ var $header = $(header);
+ // Remove any headers that appear before all other p tags in the
+ // document. This probably means that it was part of the title, a
+ // subtitle or something else extraneous like a datestamp or byline,
+ // all of which should be handled by other metadata handling.
+ if ($($header, $article).prevAll('p').length === 0) {
+ return $header.remove();
+ }
-function brsToPs$$1($) {
- var collapsing = false;
- $('br').each(function (index, element) {
- var nextElement = $(element).next().get(0);
+ // Remove any headers that match the title exactly.
+ if (normalizeSpaces($(header).text()) === title) {
+ return $header.remove();
+ }
- if (nextElement && nextElement.tagName === 'br') {
- collapsing = true;
- $(element).remove();
- } else if (collapsing) {
- collapsing = false;
- // $(element).replaceWith('')
- paragraphize(element, $, true);
+ // If this header has a negative weight, it's probably junk.
+ // Get rid of it.
+ if (getWeight($(header)) < 0) {
+ return $header.remove();
}
+
+ return $header;
});
return $;
}
-// Given a node, turn it into a P if it is not already a P, and
-// make sure it conforms to the constraints of a P tag (I.E. does
-// not contain any other block tags.)
-//
-// If the node is a , it treats the following inline siblings
-// as if they were its children.
-//
-// :param node: The node to paragraphize; this is a raw node
-// :param $: The cheerio object to handle dom manipulation
-// :param br: Whether or not the passed node is a br
-
-function paragraphize(node, $) {
- var br = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
+// Rewrite the tag name to div if it's a top level node like body or
+// html to avoid later complications with multiple body tags.
+function rewriteTopLevel$$1(article, $) {
+ // I'm not using context here because
+ // it's problematic when converting the
+ // top-level/root node - AP
+ $ = convertNodeTo$$1($('html'), $, 'div');
+ $ = convertNodeTo$$1($('body'), $, 'div');
- var $node = $(node);
+ return $;
+}
- if (br) {
- var sibling = node.nextSibling;
- var p = $('');
+function absolutize($, rootUrl, attr, $content) {
+ $('[' + attr + ']', $content).each(function (_, node) {
+ var attrs = getAttrs(node);
+ var url = attrs[attr];
- // while the next node is text or not a block level element
- // append it to a new p node
- while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))) {
- var nextSibling = sibling.nextSibling;
- $(sibling).appendTo(p);
- sibling = nextSibling;
+ if (url) {
+ var absoluteUrl = URL.resolve(rootUrl, url);
+ setAttr(node, attr, absoluteUrl);
}
+ });
+}
- $node.replaceWith(p);
- $node.remove();
- return $;
+function makeLinksAbsolute$$1($content, $, url) {
+ ['href', 'src'].forEach(function (attr) {
+ return absolutize($, url, attr, $content);
+ });
+
+ return $content;
+}
+
+function textLength(text) {
+ return text.trim().replace(/\s+/g, ' ').length;
+}
+
+// Determines what percentage of the text
+// in a node is link text
+// Takes a node, returns a float
+function linkDensity($node) {
+ var totalTextLength = textLength($node.text());
+
+ var linkText = $node.find('a').text();
+ var linkLength = textLength(linkText);
+
+ if (totalTextLength > 0) {
+ return linkLength / totalTextLength;
+ } else if (totalTextLength === 0 && linkLength > 0) {
+ return 1;
}
- return $;
+ return 0;
}
-function convertDivs($) {
- $('div').each(function (index, div) {
- var $div = $(div);
- var convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;
+// Given a node type to search for, and a list of meta tag names to
+// search for, find a meta tag associated.
+function extractFromMeta$$1($, metaNames, cachedNames) {
+ var cleanTags$$1 = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : true;
+
+ var foundNames = metaNames.filter(function (name) {
+ return cachedNames.indexOf(name) !== -1;
+ });
+
+ var _iteratorNormalCompletion = true;
+ var _didIteratorError = false;
+ var _iteratorError = undefined;
+
+ try {
+ var _loop = function _loop() {
+ var name = _step.value;
+
+ var type = 'name';
+ var value = 'value';
+
+ var nodes = $('meta[' + type + '="' + name + '"]');
+
+ // Get the unique value of every matching node, in case there
+ // are two meta tags with the same name and value.
+ // Remove empty values.
+ var values = nodes.map(function (index, node) {
+ return $(node).attr(value);
+ }).toArray().filter(function (text) {
+ return text !== '';
+ });
+
+ // If we have more than one value for the same name, we have a
+ // conflict and can't trust any of them. Skip this name. If we have
+ // zero, that means our meta tags had no values. Skip this name
+ // also.
+ if (values.length === 1) {
+ var metaValue = void 0;
+ // Meta values that contain HTML should be stripped, as they
+ // weren't subject to cleaning previously.
+ if (cleanTags$$1) {
+ metaValue = stripTags(values[0], $);
+ } else {
+ metaValue = values[0];
+ }
+
+ return {
+ v: metaValue
+ };
+ }
+ };
+
+ for (var _iterator = _getIterator(foundNames), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
+ var _ret = _loop();
- if (convertable) {
- convertNodeTo($div, $, 'p');
+ if ((typeof _ret === 'undefined' ? 'undefined' : _typeof(_ret)) === "object") return _ret.v;
}
- });
-
- return $;
-}
-function convertSpans($) {
- $('span').each(function (index, span) {
- var $span = $(span);
- var convertable = $span.parents('p, div').length === 0;
- if (convertable) {
- convertNodeTo($span, $, 'p');
+ // If nothing is found, return null
+ } catch (err) {
+ _didIteratorError = true;
+ _iteratorError = err;
+ } finally {
+ try {
+ if (!_iteratorNormalCompletion && _iterator.return) {
+ _iterator.return();
+ }
+ } finally {
+ if (_didIteratorError) {
+ throw _iteratorError;
+ }
}
- });
+ }
- return $;
+ return null;
}
-// Loop through the provided doc, and convert any p-like elements to
-// actual paragraph tags.
-//
-// Things fitting this criteria:
-// * Multiple consecutive tags.
-// * tags without block level elements inside of them
-// * tags who are not children of or tags.
-//
-// :param $: A cheerio object to search
-// :return cheerio object with new p elements
-// (By-reference mutation, though. Returned just for convenience.)
-
-function convertToParagraphs$$1($) {
- $ = brsToPs$$1($);
- $ = convertDivs($);
- $ = convertSpans($);
+function isGoodNode($node, maxChildren) {
+ // If it has a number of children, it's more likely a container
+ // element. Skip it.
+ if ($node.children().length > maxChildren) {
+ return false;
+ }
+ // If it looks to be within a comment, skip it.
+ if (withinComment$$1($node)) {
+ return false;
+ }
- return $;
+ return true;
}
-function convertNodeTo($node, $) {
- var tag = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 'p';
-
- var node = $node.get(0);
- if (!node) {
- return $;
- }
+// Given a a list of selectors find content that may
+// be extractable from the document. This is for flat
+// meta-information, like author, title, date published, etc.
+function extractFromSelectors$$1($, selectors) {
+ var maxChildren = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 1;
+ var textOnly = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : true;
+ var _iteratorNormalCompletion = true;
+ var _didIteratorError = false;
+ var _iteratorError = undefined;
- var _$node$get = $node.get(0),
- attribs = _$node$get.attribs;
+ try {
+ for (var _iterator = _getIterator(selectors), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
+ var selector = _step.value;
- var attribString = _Reflect$ownKeys(attribs).map(function (key) {
- return key + '=' + attribs[key];
- }).join(' ');
+ var nodes = $(selector);
- $node.replaceWith('<' + tag + ' ' + attribString + '>' + $node.contents() + '' + tag + '>');
- return $;
-}
+ // If we didn't get exactly one of this selector, this may be
+ // a list of articles or comments. Skip it.
+ if (nodes.length === 1) {
+ var $node = $(nodes[0]);
-function cleanForHeight($img, $) {
- var height = parseInt($img.attr('height'), 10);
- var width = parseInt($img.attr('width'), 10) || 20;
+ if (isGoodNode($node, maxChildren)) {
+ var content = void 0;
+ if (textOnly) {
+ content = $node.text();
+ } else {
+ content = $node.html();
+ }
- // Remove images that explicitly have very small heights or
- // widths, because they are most likely shims or icons,
- // which aren't very useful for reading.
- if ((height || 20) < 10 || width < 10) {
- $img.remove();
- } else if (height) {
- // Don't ever specify a height on images, so that we can
- // scale with respect to width without screwing up the
- // aspect ratio.
- $img.removeAttr('height');
+ if (content) {
+ return content;
+ }
+ }
+ }
+ }
+ } catch (err) {
+ _didIteratorError = true;
+ _iteratorError = err;
+ } finally {
+ try {
+ if (!_iteratorNormalCompletion && _iterator.return) {
+ _iterator.return();
+ }
+ } finally {
+ if (_didIteratorError) {
+ throw _iteratorError;
+ }
+ }
}
- return $;
+ return null;
}
-// Cleans out images where the source string matches transparent/spacer/etc
-// TODO This seems very aggressive - AP
-function removeSpacers($img, $) {
- if (SPACER_RE.test($img.attr('src'))) {
- $img.remove();
- }
-
- return $;
+// strips all tags from a string of text
+function stripTags(text, $) {
+ // Wrapping text in html element prevents errors when text
+ // has no html
+ var cleanText = $('' + text + '').text();
+ return cleanText === '' ? text : cleanText;
}
-function cleanImages($article, $) {
- $article.find('img').each(function (index, img) {
- var $img = $(img);
+function withinComment$$1($node) {
+ var parents = $node.parents().toArray();
+ var commentParent = parents.find(function (parent) {
+ var attrs = getAttrs(parent);
+ var nodeClass = attrs.class,
+ id = attrs.id;
- cleanForHeight($img, $);
- removeSpacers($img, $);
+ var classAndId = nodeClass + ' ' + id;
+ return classAndId.includes('comment');
});
- return $;
+ return commentParent !== undefined;
}
-function markToKeep(article, $, url) {
- var tags = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : [];
+// Given a node, determine if it's article-like enough to return
+// param: node (a cheerio node)
+// return: boolean
- if (tags.length === 0) {
- tags = KEEP_SELECTORS;
- }
+function nodeIsSufficient($node) {
+ return $node.text().trim().length >= 100;
+}
- if (url) {
- var _URL$parse = URL.parse(url),
- protocol = _URL$parse.protocol,
- hostname = _URL$parse.hostname;
+function isWordpress($) {
+ return $(IS_WP_SELECTOR).length > 0;
+}
- tags = [].concat(_toConsumableArray(tags), ['iframe[src^="' + protocol + '//' + hostname + '"]']);
- }
+function getAttrs(node) {
+ var attribs = node.attribs,
+ attributes = node.attributes;
- $(tags.join(','), article).addClass(KEEP_CLASS);
- return $;
-}
+ if (!attribs && attributes) {
+ var attrs = _Reflect$ownKeys(attributes).reduce(function (acc, index) {
+ var attr = attributes[index];
-function stripJunkTags(article, $) {
- var tags = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : [];
+ if (!attr.name || !attr.value) return acc;
- if (tags.length === 0) {
- tags = STRIP_OUTPUT_TAGS;
+ acc[attr.name] = attr.value;
+ return acc;
+ }, {});
+ return attrs;
}
- // Remove matching elements, but ignore
- // any element with a class of mercury-parser-keep
- $(tags.join(','), article).not('.' + KEEP_CLASS).remove();
+ return attribs;
+}
- // Remove the mercury-parser-keep class from result
- $('.' + KEEP_CLASS, article).removeClass(KEEP_CLASS);
+function setAttr(node, attr, val) {
+ if (node.attribs) {
+ node.attribs[attr] = val;
+ } else if (node.attributes) {
+ node.setAttribute(attr, val);
+ }
- return $;
+ return node;
}
-// H1 tags are typically the article title, which should be extracted
-// by the title extractor instead. If there's less than 3 of them (<3),
-// strip them. Otherwise, turn 'em into H2s.
-
-function cleanHOnes$$1(article, $) {
- var $hOnes = $('h1', article);
+function setAttrs(node, attrs) {
+ if (node.attribs) {
+ node.attribs = attrs;
+ } else if (node.attributes) {
+ while (node.attributes.length > 0) {
+ node.removeAttribute(node.attributes[0].name);
+ }
- if ($hOnes.length < 3) {
- $hOnes.each(function (index, node) {
- return $(node).remove();
- });
- } else {
- $hOnes.each(function (index, node) {
- convertNodeTo($(node), $, 'h2');
+ _Reflect$ownKeys(attrs).forEach(function (key) {
+ node.setAttribute(key, attrs[key]);
});
}
- return $;
+ return node;
}
-function removeAllButWhitelist($article) {
- $article.find('*').each(function (index, node) {
- node.attribs = _Reflect$ownKeys(node.attribs).reduce(function (acc, attr) {
- if (WHITELIST_ATTRS_RE.test(attr)) {
- return _extends({}, acc, _defineProperty({}, attr, node.attribs[attr]));
- }
+// DOM manipulation
- return acc;
- }, {});
+var IS_LINK = new RegExp('https?://', 'i');
+var IS_IMAGE = new RegExp('.(png|gif|jpe?g)', 'i');
+
+var TAGS_TO_REMOVE = ['script', 'style', 'form'].join(',');
+
+// Convert all instances of images with potentially
+// lazy loaded images into normal images.
+// Many sites will have img tags with no source, or an image tag with a src
+// attribute that a is a placeholer. We need to be able to properly fill in
+// the src attribute so the images are no longer lazy loaded.
+function convertLazyLoadedImages($) {
+ $('img').each(function (_, img) {
+ var attrs = getAttrs(img);
+
+ _Reflect$ownKeys(attrs).forEach(function (attr) {
+ var value = attrs[attr];
+
+ if (attr !== 'src' && IS_LINK.test(value) && IS_IMAGE.test(value)) {
+ $(img).attr('src', value);
+ }
+ });
});
- return $article;
+ return $;
}
-// function removeAttrs(article, $) {
-// REMOVE_ATTRS.forEach((attr) => {
-// $(`[${attr}]`, article).removeAttr(attr);
-// });
-// }
-
-// Remove attributes like style or align
-function cleanAttributes($article) {
- // Grabbing the parent because at this point
- // $article will be wrapped in a div which will
- // have a score set on it.
- return removeAllButWhitelist($article.parent().length ? $article.parent() : $article);
+function isComment(index, node) {
+ return node.type === 'comment';
}
-function removeEmpty($article, $) {
- $article.find('p').each(function (index, p) {
- var $p = $(p);
- if ($p.find('iframe, img').length === 0 && $p.text().trim() === '') $p.remove();
- });
+function cleanComments($) {
+ $.root().find('*').contents().filter(isComment).remove();
+
+ return $;
+}
+
+function clean($) {
+ $(TAGS_TO_REMOVE).remove();
+ $ = cleanComments($);
return $;
}
-// // CONTENT FETCHING CONSTANTS ////
+var Resource = {
-// A list of strings that can be considered unlikely candidates when
-// extracting content from a resource. These strings are joined together
-// and then tested for existence using re:test, so may contain simple,
-// non-pipe style regular expression queries if necessary.
-var UNLIKELY_CANDIDATES_BLACKLIST$1 = ['ad-break', 'adbox', 'advert', 'addthis', 'agegate', 'aux', 'blogger-labels', 'combx', 'comment', 'conversation', 'disqus', 'entry-unrelated', 'extra', 'foot', 'form', 'header', 'hidden', 'loader', 'login', // Note: This can hit 'blogindex'.
-'menu', 'meta', 'nav', 'pager', 'pagination', 'predicta', // readwriteweb inline ad box
-'presence_control_external', // lifehacker.com container full of false positives
-'popup', 'printfriendly', 'related', 'remove', 'remark', 'rss', 'share', 'shoutbox', 'sidebar', 'sociable', 'sponsor', 'tools'];
+ // Create a Resource.
+ //
+ // :param url: The URL for the document we should retrieve.
+ // :param response: If set, use as the response rather than
+ // attempting to fetch it ourselves. Expects a
+ // string.
+ create: function create(url, preparedResponse, parsedUrl) {
+ var _this = this;
-// A list of strings that can be considered LIKELY candidates when
-// extracting content from a resource. Essentially, the inverse of the
-// blacklist above - if something matches both blacklist and whitelist,
-// it is kept. This is useful, for example, if something has a className
-// of "rss-content entry-content". It matched 'rss', so it would normally
-// be removed, however, it's also the entry content, so it should be left
-// alone.
-//
-// These strings are joined together and then tested for existence using
-// re:test, so may contain simple, non-pipe style regular expression queries
-// if necessary.
-var UNLIKELY_CANDIDATES_WHITELIST$1 = ['and', 'article', 'body', 'blogindex', 'column', 'content', 'entry-content-asset', 'format', // misuse of form
-'hfeed', 'hentry', 'hatom', 'main', 'page', 'posts', 'shadow'];
+ return _asyncToGenerator(_regeneratorRuntime.mark(function _callee() {
+ var result, validResponse;
+ return _regeneratorRuntime.wrap(function _callee$(_context) {
+ while (1) {
+ switch (_context.prev = _context.next) {
+ case 0:
+ result = void 0;
-// A list of tags which, if found inside, should cause a to NOT
-// be turned into a paragraph tag. Shallow div tags without these elements
-// should be turned into tags.
-var DIV_TO_P_BLOCK_TAGS$1 = ['a', 'blockquote', 'dl', 'div', 'img', 'p', 'pre', 'table'].join(',');
+ if (!preparedResponse) {
+ _context.next = 6;
+ break;
+ }
-// A list of tags that should be ignored when trying to find the top candidate
-// for a document.
-var NON_TOP_CANDIDATE_TAGS$1 = ['br', 'b', 'i', 'label', 'hr', 'area', 'base', 'basefont', 'input', 'img', 'link', 'meta'];
+ validResponse = {
+ statusMessage: 'OK',
+ statusCode: 200,
+ headers: {
+ 'content-type': 'text/html',
+ 'content-length': 500
+ }
+ };
-var NON_TOP_CANDIDATE_TAGS_RE$1 = new RegExp('^(' + NON_TOP_CANDIDATE_TAGS$1.join('|') + ')$', 'i');
-// A list of selectors that specify, very clearly, either hNews or other
-// very content-specific style content, like Blogger templates.
-// More examples here: http://microformats.org/wiki/blog-post-formats
-var HNEWS_CONTENT_SELECTORS$1 = [['.hentry', '.entry-content'], ['entry', '.entry-content'], ['.entry', '.entry_content'], ['.post', '.postbody'], ['.post', '.post_body'], ['.post', '.post-body']];
+ result = { body: preparedResponse, response: validResponse };
+ _context.next = 9;
+ break;
-var PHOTO_HINTS$1 = ['figure', 'photo', 'image', 'caption'];
-var PHOTO_HINTS_RE$1 = new RegExp(PHOTO_HINTS$1.join('|'), 'i');
+ case 6:
+ _context.next = 8;
+ return fetchResource$1(url, parsedUrl);
-// A list of strings that denote a positive scoring for this content as being
-// an article container. Checked against className and id.
-//
-// TODO: Perhaps have these scale based on their odds of being quality?
-var POSITIVE_SCORE_HINTS$1 = ['article', 'articlecontent', 'instapaper_body', 'blog', 'body', 'content', 'entry-content-asset', 'entry', 'hentry', 'main', 'Normal', 'page', 'pagination', 'permalink', 'post', 'story', 'text', '[-_]copy', // usatoday
-'\\Bcopy'];
+ case 8:
+ result = _context.sent;
-// The above list, joined into a matching regular expression
-var POSITIVE_SCORE_RE$1 = new RegExp(POSITIVE_SCORE_HINTS$1.join('|'), 'i');
+ case 9:
+ if (!result.error) {
+ _context.next = 12;
+ break;
+ }
-// Readability publisher-specific guidelines
-var READABILITY_ASSET$1 = new RegExp('entry-content-asset', 'i');
+ result.failed = true;
+ return _context.abrupt('return', result);
-// A list of strings that denote a negative scoring for this content as being
-// an article container. Checked against className and id.
-//
-// TODO: Perhaps have these scale based on their odds of being quality?
-var NEGATIVE_SCORE_HINTS$1 = ['adbox', 'advert', 'author', 'bio', 'bookmark', 'bottom', 'byline', 'clear', 'com-', 'combx', 'comment', 'comment\\B', 'contact', 'copy', 'credit', 'crumb', 'date', 'deck', 'excerpt', 'featured', // tnr.com has a featured_content which throws us off
-'foot', 'footer', 'footnote', 'graf', 'head', 'info', 'infotext', // newscientist.com copyright
-'instapaper_ignore', 'jump', 'linebreak', 'link', 'masthead', 'media', 'meta', 'modal', 'outbrain', // slate.com junk
-'promo', 'pr_', // autoblog - press release
-'related', 'respond', 'roundcontent', // lifehacker restricted content warning
-'scroll', 'secondary', 'share', 'shopping', 'shoutbox', 'side', 'sidebar', 'sponsor', 'stamp', 'sub', 'summary', 'tags', 'tools', 'widget'];
-// The above list, joined into a matching regular expression
-var NEGATIVE_SCORE_RE$1 = new RegExp(NEGATIVE_SCORE_HINTS$1.join('|'), 'i');
+ case 12:
+ return _context.abrupt('return', _this.generateDoc(result));
-// Match a digit. Pretty clear.
+ case 13:
+ case 'end':
+ return _context.stop();
+ }
+ }
+ }, _callee, _this);
+ }))();
+ },
+ generateDoc: function generateDoc(_ref) {
+ var content = _ref.body,
+ response = _ref.response;
+ var contentType = response.headers['content-type'];
+ // TODO: Implement is_text function from
+ // https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57
-// Match 2 or more consecutive tags
+ if (!contentType.includes('html') && !contentType.includes('text')) {
+ throw new Error('Content does not appear to be text.');
+ }
+ var $ = cheerio.load(content, { normalizeWhitespace: true });
-// Match 1 BR tag.
+ if ($.root().children().length === 0) {
+ throw new Error('No children, likely a bad parse.');
+ }
+ $ = normalizeMetaTags($);
+ $ = convertLazyLoadedImages($);
+ $ = clean($);
-// A list of all of the block level tags known in HTML5 and below. Taken from
-// http://bit.ly/qneNIT
+ return $;
+ }
+};
+var merge = function merge(extractor, domains) {
+ return domains.reduce(function (acc, domain) {
+ acc[domain] = extractor;
+ return acc;
+ }, {});
+};
+function mergeSupportedDomains(extractor) {
+ return extractor.supportedDomains ? merge(extractor, [extractor.domain].concat(_toConsumableArray(extractor.supportedDomains))) : merge(extractor, [extractor.domain]);
+}
-// The removal is implemented as a blacklist and whitelist, this test finds
-// blacklisted elements that aren't whitelisted. We do this all in one
-// expression-both because it's only one pass, and because this skips the
-// serialization for whitelisted nodes.
-var candidatesBlacklist$1 = UNLIKELY_CANDIDATES_BLACKLIST$1.join('|');
+var BloggerExtractor = {
+ domain: 'blogspot.com',
+ content: {
+ // Blogger is insane and does not load its content
+ // initially in the page, but it's all there
+ // in noscript
+ selectors: ['.post-content noscript'],
+ // Selectors to remove from the extracted content
+ clean: [],
-var candidatesWhitelist$1 = UNLIKELY_CANDIDATES_WHITELIST$1.join('|');
+ // Convert the noscript tag to a div
+ transforms: {
+ noscript: 'div'
+ }
+ },
+ author: {
+ selectors: ['.post-author-name']
+ },
+ title: {
+ selectors: ['.post h2.title']
+ },
+ date_published: {
+ selectors: ['span.publishdate']
+ }
+};
-var PARAGRAPH_SCORE_TAGS$1 = new RegExp('^(p|li|span|pre)$', 'i');
-var CHILD_CONTENT_TAGS$1 = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');
-var BAD_TAGS$1 = new RegExp('^(address|form)$', 'i');
+var NYMagExtractor = {
+ domain: 'nymag.com',
+ content: {
+ // Order by most likely. Extractor will stop on first occurrence
+ selectors: ['div.article-content', 'section.body', 'article.article'],
-// Get the score of a node based on its className and id.
-function getWeight(node) {
- var classes = node.attr('class');
- var id = node.attr('id');
- var score = 0;
+ // Selectors to remove from the extracted content
+ clean: ['.ad', '.single-related-story'],
- if (id) {
- // if id exists, try to score on both positive and negative
- if (POSITIVE_SCORE_RE$1.test(id)) {
- score += 25;
- }
- if (NEGATIVE_SCORE_RE$1.test(id)) {
- score -= 25;
- }
- }
+ // Object of tranformations to make on matched elements
+ // Each key is the selector, each value is the tag to
+ // transform to.
+ // If a function is given, it should return a string
+ // to convert to or nothing (in which case it will not perform
+ // the transformation.
+ transforms: {
+ // Convert h1s to h2s
+ h1: 'h2',
- if (classes) {
- if (score === 0) {
- // if classes exist and id did not contribute to score
- // try to score on both positive and negative
- if (POSITIVE_SCORE_RE$1.test(classes)) {
- score += 25;
- }
- if (NEGATIVE_SCORE_RE$1.test(classes)) {
- score -= 25;
+ // Convert lazy-loaded noscript images to figures
+ noscript: function noscript($node, $) {
+ var $children = $.browser ? $($node.text()) : $node.children();
+ if ($children.length === 1 && $children.get(0) !== undefined && $children.get(0).tagName.toLowerCase() === 'img') {
+ return 'figure';
+ }
+
+ return null;
}
}
+ },
+
+ title: {
+ selectors: ['h1.lede-feature-title', 'h1.headline-primary', 'h1']
+ },
+
+ author: {
+ selectors: ['.by-authors', '.lede-feature-author']
+ },
+
+ dek: {
+ selectors: ['.lede-feature-teaser']
+ },
+
+ date_published: {
+ selectors: [['time.article-timestamp[datetime]', 'datetime'], 'time.article-timestamp']
+ }
+};
+
+var WikipediaExtractor = {
+ domain: 'wikipedia.org',
+ content: {
+ selectors: ['#mw-content-text'],
+
+ defaultCleaner: false,
+
+ // transform top infobox to an image with caption
+ transforms: {
+ '.infobox img': function infoboxImg($node) {
+ var $parent = $node.parents('.infobox');
+ // Only prepend the first image in .infobox
+ if ($parent.children('img').length === 0) {
+ $parent.prepend($node);
+ }
+ },
+ '.infobox caption': 'figcaption',
+ '.infobox': 'figure'
+ },
- // even if score has been set by id, add score for
- // possible photo matches
- // "try to keep photos if we can"
- if (PHOTO_HINTS_RE$1.test(classes)) {
- score += 10;
- }
+ // Selectors to remove from the extracted content
+ clean: ['.mw-editsection', 'figure tr, figure td, figure tbody', '#toc', '.navbox']
- // add 25 if class matches entry-content-asset,
- // a class apparently instructed for use in the
- // Readability publisher guidelines
- // https://www.readability.com/developers/guidelines
- if (READABILITY_ASSET$1.test(classes)) {
- score += 25;
- }
- }
+ },
- return score;
-}
+ author: 'Wikipedia Contributors',
-// returns the score of a node based on
-// the node's score attribute
-// returns null if no score set
-function getScore($node) {
- return parseFloat($node.attr('score')) || null;
-}
+ title: {
+ selectors: ['h2.title']
+ },
-// return 1 for every comma in text
-function scoreCommas(text) {
- return (text.match(/,/g) || []).length;
-}
+ date_published: {
+ selectors: ['#footer-info-lastmod']
+ }
-var idkRe = new RegExp('^(p|pre)$', 'i');
+};
-function scoreLength(textLength) {
- var tagName = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 'p';
+var TwitterExtractor = {
+ domain: 'twitter.com',
- var chunks = textLength / 50;
+ content: {
+ transforms: {
+ // We're transforming essentially the whole page here.
+ // Twitter doesn't have nice selectors, so our initial
+ // selector grabs the whole page, then we're re-writing
+ // it to fit our needs before we clean it up.
+ '.permalink[role=main]': function permalinkRoleMain($node, $) {
+ var tweets = $node.find('.tweet');
+ var $tweetContainer = $('');
+ $tweetContainer.append(tweets);
+ $node.replaceWith($tweetContainer);
+ },
- if (chunks > 0) {
- var lengthBonus = void 0;
+ // Twitter wraps @ with s, which
+ // renders as a strikethrough
+ s: 'span'
+ },
- // No idea why p or pre are being tamped down here
- // but just following the source for now
- // Not even sure why tagName is included here,
- // since this is only being called from the context
- // of scoreParagraph
- if (idkRe.test(tagName)) {
- lengthBonus = chunks - 2;
- } else {
- lengthBonus = chunks - 1.25;
- }
+ selectors: ['.permalink[role=main]'],
- return Math.min(Math.max(lengthBonus, 0), 3);
- }
+ defaultCleaner: false,
- return 0;
-}
+ clean: ['.stream-item-footer', 'button', '.tweet-details-fixer']
+ },
-// Score a paragraph using various methods. Things like number of
-// commas, etc. Higher is better.
-function scoreParagraph$$1(node) {
- var score = 1;
- var text = node.text().trim();
- var textLength = text.length;
+ author: {
+ selectors: ['.tweet.permalink-tweet .username']
+ },
- // If this paragraph is less than 25 characters, don't count it.
- if (textLength < 25) {
- return 0;
+ date_published: {
+ selectors: [['.permalink-tweet ._timestamp[data-time-ms]', 'data-time-ms']]
}
- // Add points for any commas within this paragraph
- score += scoreCommas(text);
+};
- // For every 50 characters in this paragraph, add another point. Up
- // to 3 points.
- score += scoreLength(textLength);
+var NYTimesExtractor = {
+ domain: 'www.nytimes.com',
- // Articles can end with short paragraphs when people are being clever
- // but they can also end with short paragraphs setting up lists of junk
- // that we strip. This negative tweaks junk setup paragraphs just below
- // the cutoff threshold.
- if (text.slice(-1) === ':') {
- score -= 1;
- }
+ title: {
+ selectors: ['.g-headline', 'h1.headline']
+ },
- return score;
-}
+ author: {
+ selectors: [['meta[name="author"]', 'value'], '.g-byline', '.byline']
+ },
-function setScore($node, $, score) {
- $node.attr('score', score);
- return $node;
-}
+ content: {
+ selectors: ['div.g-blocks', 'article#story'],
-function addScore$$1($node, $, amount) {
- try {
- var score = getOrInitScore$$1($node, $) + amount;
- setScore($node, $, score);
- } catch (e) {
- // Ignoring; error occurs in scoreNode
- }
+ transforms: {
+ 'img.g-lazy': function imgGLazy($node) {
+ var src = $node.attr('src');
+ // const widths = $node.attr('data-widths')
+ // .slice(1)
+ // .slice(0, -1)
+ // .split(',');
+ // if (widths.length) {
+ // width = widths.slice(-1);
+ // } else {
+ // width = '900';
+ // }
+ var width = 640;
- return $node;
-}
+ src = src.replace('{{size}}', width);
+ $node.attr('src', src);
+ }
+ },
-// Adds 1/4 of a child's score to its parent
-function addToParent$$1(node, $, score) {
- var parent = node.parent();
- if (parent) {
- addScore$$1(parent, $, score * 0.25);
- }
+ clean: ['.ad', 'header#story-header', '.story-body-1 .lede.video', '.visually-hidden', '#newsletter-promo', '.promo', '.comments-button', '.hidden', '.comments']
+ },
- return node;
-}
+ date_published: null,
-// gets and returns the score if it exists
-// if not, initializes a score based on
-// the node's tag type
-function getOrInitScore$$1($node, $) {
- var weightNodes = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : true;
+ lead_image_url: null,
- var score = getScore($node);
+ dek: null,
- if (score) {
- return score;
- }
+ next_page_url: null,
- score = scoreNode$$1($node);
+ excerpt: null
+};
- if (weightNodes) {
- score += getWeight($node);
- }
+// Rename CustomExtractor
+// to fit your publication
+var TheAtlanticExtractor = {
+ domain: 'www.theatlantic.com',
+ title: {
+ selectors: ['h1.hed']
+ },
- addToParent$$1($node, $, score);
+ author: {
+ selectors: ['article#article .article-cover-extra .metadata .byline a']
+ },
- return score;
-}
+ content: {
+ selectors: ['.article-body'],
-// Score an individual node. Has some smarts for paragraphs, otherwise
-// just scores based on tag.
-function scoreNode$$1($node) {
- var _$node$get = $node.get(0),
- tagName = _$node$get.tagName;
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: [],
- // TODO: Consider ordering by most likely.
- // E.g., if divs are a more common tag on a page,
- // Could save doing that regex test on every node – AP
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: []
+ },
+ date_published: {
+ selectors: [['time[itemProp="datePublished"]', 'datetime']]
+ },
- if (PARAGRAPH_SCORE_TAGS$1.test(tagName)) {
- return scoreParagraph$$1($node);
- } else if (tagName === 'div') {
- return 5;
- } else if (CHILD_CONTENT_TAGS$1.test(tagName)) {
- return 3;
- } else if (BAD_TAGS$1.test(tagName)) {
- return -3;
- } else if (tagName === 'th') {
- return -5;
- }
+ lead_image_url: null,
- return 0;
-}
+ dek: null,
-function convertSpans$1($node, $) {
- if ($node.get(0)) {
- var _$node$get = $node.get(0),
- tagName = _$node$get.tagName;
+ next_page_url: null,
- if (tagName === 'span') {
- // convert spans to divs
- convertNodeTo($node, $, 'div');
- }
- }
-}
+ excerpt: null
+};
-function addScoreTo($node, $, score) {
- if ($node) {
- convertSpans$1($node, $);
- addScore$$1($node, $, score);
- }
-}
+// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var NewYorkerExtractor = {
+ domain: 'www.newyorker.com',
+ title: {
+ selectors: ['h1.title']
+ },
+
+ author: {
+ selectors: ['.contributors']
+ },
-function scorePs($, weightNodes) {
- $('p, pre').not('[score]').each(function (index, node) {
- // The raw score for this paragraph, before we add any parent/child
- // scores.
- var $node = $(node);
- $node = setScore($node, $, getOrInitScore$$1($node, $, weightNodes));
+ content: {
+ selectors: ['div#articleBody', 'div.articleBody'],
- var $parent = $node.parent();
- var rawScore = scoreNode$$1($node);
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: [],
- addScoreTo($parent, $, rawScore, weightNodes);
- if ($parent) {
- // Add half of the individual content score to the
- // grandparent
- addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);
- }
- });
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: []
+ },
- return $;
-}
+ date_published: {
+ selectors: [['meta[name="article:published_time"]', 'value']]
+ },
-// score content. Parents get the full value of their children's
-// content score, grandparents half
-function scoreContent$$1($) {
- var weightNodes = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : true;
+ lead_image_url: {
+ selectors: [['meta[name="og:image"]', 'value']]
+ },
- // First, look for special hNews based selectors and give them a big
- // boost, if they exist
- HNEWS_CONTENT_SELECTORS$1.forEach(function (_ref) {
- var _ref2 = _slicedToArray(_ref, 2),
- parentSelector = _ref2[0],
- childSelector = _ref2[1];
+ dek: {
+ selectors: [['meta[name="og:description"]', 'value']]
+ },
- $(parentSelector + ' ' + childSelector).each(function (index, node) {
- addScore$$1($(node).parent(parentSelector), $, 80);
- });
- });
+ next_page_url: null,
- // Doubling this again
- // Previous solution caused a bug
- // in which parents weren't retaining
- // scores. This is not ideal, and
- // should be fixed.
- scorePs($, weightNodes);
- scorePs($, weightNodes);
+ excerpt: null
+};
- return $;
-}
+// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var WiredExtractor = {
+ domain: 'www.wired.com',
+ title: {
+ selectors: ['h1.post-title']
+ },
-var NORMALIZE_RE = /\s{2,}/g;
+ author: {
+ selectors: ['a[rel="author"]']
+ },
-function normalizeSpaces(text) {
- return text.replace(NORMALIZE_RE, ' ').trim();
-}
+ content: {
+ selectors: ['article.content'],
-// Given a node type to search for, and a list of regular expressions,
-// look to see if this extraction can be found in the URL. Expects
-// that each expression in r_list will return group(1) as the proper
-// string to be cleaned.
-// Only used for date_published currently.
-function extractFromUrl(url, regexList) {
- var matchRe = regexList.find(function (re) {
- return re.test(url);
- });
- if (matchRe) {
- return matchRe.exec(url)[1];
- }
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: [],
- return null;
-}
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: ['.visually-hidden']
+ },
-// An expression that looks to try to find the page digit within a URL, if
-// it exists.
-// Matches:
-// page=1
-// pg=1
-// p=1
-// paging=12
-// pag=7
-// pagination/1
-// paging/88
-// pa/83
-// p/11
-//
-// Does not match:
-// pg=102
-// page:2
-var PAGE_IN_HREF_RE = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');
+ date_published: {
+ selectors: [['meta[itemprop="datePublished"]', 'value']]
+ },
-var HAS_ALPHA_RE = /[a-z]/i;
+ lead_image_url: {
+ selectors: [['meta[name="og:image"]', 'value']]
+ },
-var IS_ALPHA_RE = /^[a-z]+$/i;
-var IS_DIGIT_RE = /^[0-9]+$/i;
+ dek: {
+ selectors: [['meta[name="og:description"]', 'value']]
+ },
-function pageNumFromUrl(url) {
- var matches = url.match(PAGE_IN_HREF_RE);
- if (!matches) return null;
+ next_page_url: null,
- var pageNum = parseInt(matches[6], 10);
+ excerpt: null
+};
- // Return pageNum < 100, otherwise
- // return null
- return pageNum < 100 ? pageNum : null;
-}
+// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var MSNExtractor = {
+ domain: 'www.msn.com',
+ title: {
+ selectors: ['h1']
+ },
-function removeAnchor(url) {
- return url.split('#')[0].replace(/\/$/, '');
-}
+ author: {
+ selectors: ['span.authorname-txt']
+ },
-function isGoodSegment(segment, index, firstSegmentHasLetters) {
- var goodSegment = true;
+ content: {
+ selectors: ['div.richtext'],
- // If this is purely a number, and it's the first or second
- // url_segment, it's probably a page number. Remove it.
- if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {
- goodSegment = true;
- }
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: [],
- // If this is the first url_segment and it's just "index",
- // remove it
- if (index === 0 && segment.toLowerCase() === 'index') {
- goodSegment = false;
- }
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: ['span.caption']
+ },
- // If our first or second url_segment is smaller than 3 characters,
- // and the first url_segment had no alphas, remove it.
- if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {
- goodSegment = false;
- }
+ date_published: {
+ selectors: ['span.time']
+ },
- return goodSegment;
-}
+ lead_image_url: {
+ selectors: []
+ },
-// Take a URL, and return the article base of said URL. That is, no
-// pagination data exists in it. Useful for comparing to other links
-// that might have pagination data within them.
-function articleBaseUrl(url, parsed) {
- var parsedUrl = parsed || URL.parse(url);
- var protocol = parsedUrl.protocol,
- host = parsedUrl.host,
- path = parsedUrl.path;
+ dek: {
+ selectors: [['meta[name="description"]', 'value']]
+ },
+ next_page_url: null,
- var firstSegmentHasLetters = false;
- var cleanedSegments = path.split('/').reverse().reduce(function (acc, rawSegment, index) {
- var segment = rawSegment;
+ excerpt: null
+};
- // Split off and save anything that looks like a file type.
- if (segment.includes('.')) {
- var _segment$split = segment.split('.'),
- _segment$split2 = _slicedToArray(_segment$split, 2),
- possibleSegment = _segment$split2[0],
- fileExt = _segment$split2[1];
+// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var YahooExtractor = {
+ domain: 'www.yahoo.com',
+ title: {
+ selectors: ['header.canvas-header']
+ },
- if (IS_ALPHA_RE.test(fileExt)) {
- segment = possibleSegment;
- }
- }
+ author: {
+ selectors: ['span.provider-name']
+ },
- // If our first or second segment has anything looking like a page
- // number, remove it.
- if (PAGE_IN_HREF_RE.test(segment) && index < 2) {
- segment = segment.replace(PAGE_IN_HREF_RE, '');
- }
+ content: {
+ selectors: [
+ // enter content selectors
+ '.content-canvas'],
- // If we're on the first segment, check to see if we have any
- // characters in it. The first segment is actually the last bit of
- // the URL, and this will be helpful to determine if we're on a URL
- // segment that looks like "/2/" for example.
- if (index === 0) {
- firstSegmentHasLetters = HAS_ALPHA_RE.test(segment);
- }
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: [],
+
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: ['.figure-caption']
+ },
+
+ date_published: {
+ selectors: [['time.date[datetime]', 'datetime']]
+ },
- // If it's not marked for deletion, push it to cleaned_segments.
- if (isGoodSegment(segment, index, firstSegmentHasLetters)) {
- acc.push(segment);
- }
+ lead_image_url: {
+ selectors: [['meta[name="og:image"]', 'value']]
+ },
- return acc;
- }, []);
+ dek: {
+ selectors: [['meta[name="og:description"]', 'value']]
+ },
- return protocol + '//' + host + cleanedSegments.reverse().join('/');
-}
+ next_page_url: null,
-// Given a string, return True if it appears to have an ending sentence
-// within it, false otherwise.
-var SENTENCE_END_RE = new RegExp('.( |$)');
-function hasSentenceEnd(text) {
- return SENTENCE_END_RE.test(text);
-}
+ excerpt: null
+};
-function excerptContent(content) {
- var words = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 10;
+// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var BuzzfeedExtractor = {
+ domain: 'www.buzzfeed.com',
+ title: {
+ selectors: ['h1[id="post-title"]']
+ },
- return content.trim().split(/\s+/).slice(0, words).join(' ');
-}
+ author: {
+ selectors: ['a[data-action="user/username"]', 'byline__author']
+ },
-// Now that we have a top_candidate, look through the siblings of
-// it to see if any of them are decently scored. If they are, they
-// may be split parts of the content (Like two divs, a preamble and
-// a body.) Example:
-// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14
-function mergeSiblings($candidate, topScore, $) {
- if (!$candidate.parent().length) {
- return $candidate;
- }
+ content: {
+ selectors: ['#buzz_sub_buzz'],
- var siblingScoreThreshold = Math.max(10, topScore * 0.25);
- var wrappingDiv = $('');
+ defaultCleaner: false,
- $candidate.parent().children().each(function (index, sibling) {
- var $sibling = $(sibling);
- // Ignore tags like BR, HR, etc
- if (NON_TOP_CANDIDATE_TAGS_RE$1.test(sibling.tagName)) {
- return null;
- }
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: {
+ h2: 'b'
+ },
- var siblingScore = getScore($sibling);
- if (siblingScore) {
- if ($sibling === $candidate) {
- wrappingDiv.append($sibling);
- } else {
- var contentBonus = 0;
- var density = linkDensity($sibling);
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: ['.instapaper_ignore', '.suplist_list_hide .buzz_superlist_item .buzz_superlist_number_inline', '.share-box']
+ },
- // If sibling has a very low link density,
- // give it a small bonus
- if (density < 0.05) {
- contentBonus += 20;
- }
+ date_published: {
+ selectors: ['.buzz-datetime']
+ },
- // If sibling has a high link density,
- // give it a penalty
- if (density >= 0.5) {
- contentBonus -= 20;
- }
+ lead_image_url: {
+ selectors: [['meta[name="og:image"]', 'value']]
+ },
- // If sibling node has the same class as
- // candidate, give it a bonus
- if ($sibling.attr('class') === $candidate.attr('class')) {
- contentBonus += topScore * 0.2;
- }
+ dek: {
+ selectors: [['meta[name="description"]', 'value']]
+ },
- var newScore = siblingScore + contentBonus;
+ next_page_url: null,
- if (newScore >= siblingScoreThreshold) {
- return wrappingDiv.append($sibling);
- } else if (sibling.tagName === 'p') {
- var siblingContent = $sibling.text();
- var siblingContentLength = textLength(siblingContent);
+ excerpt: null
+};
- if (siblingContentLength > 80 && density < 0.25) {
- return wrappingDiv.append($sibling);
- } else if (siblingContentLength <= 80 && density === 0 && hasSentenceEnd(siblingContent)) {
- return wrappingDiv.append($sibling);
- }
- }
- }
- }
+// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var WikiaExtractor = {
+ domain: 'fandom.wikia.com',
+ title: {
+ selectors: ['h1.entry-title']
+ },
- return null;
- });
+ author: {
+ selectors: ['.author vcard', '.fn']
+ },
- return wrappingDiv;
-}
+ content: {
+ selectors: ['.grid-content', '.entry-content'],
-// After we've calculated scores, loop through all of the possible
-// candidate nodes we found and find the one with the highest score.
-function findTopCandidate$$1($) {
- var $candidate = void 0;
- var topScore = 0;
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: [],
- $('[score]').each(function (index, node) {
- // Ignore tags like BR, HR, etc
- if (NON_TOP_CANDIDATE_TAGS_RE$1.test(node.tagName)) {
- return;
- }
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: []
+ },
- var $node = $(node);
- var score = getScore($node);
+ date_published: {
+ selectors: [['meta[name="article:published_time"]', 'value']]
+ },
- if (score > topScore) {
- topScore = score;
- $candidate = $node;
- }
- });
+ lead_image_url: {
+ selectors: [['meta[name="og:image"]', 'value']]
+ },
- // If we don't have a candidate, return the body
- // or whatever the first element is
- if (!$candidate) {
- return $('body') || $('*').first();
- }
+ dek: {
+ selectors: [['meta[name="og:description"]', 'value']]
+ },
- $candidate = mergeSiblings($candidate, topScore, $);
+ next_page_url: null,
- return $candidate;
-}
+ excerpt: null
+};
-// Scoring
+// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var LittleThingsExtractor = {
+ domain: 'www.littlethings.com',
+ title: {
+ selectors: ['h1.post-title']
+ },
-function removeUnlessContent($node, $, weight) {
- // Explicitly save entry-content-asset tags, which are
- // noted as valuable in the Publisher guidelines. For now
- // this works everywhere. We may want to consider making
- // this less of a sure-thing later.
- if ($node.hasClass('entry-content-asset')) {
- return;
- }
+ author: {
+ selectors: [['meta[name="author"]', 'value']]
+ },
- var content = normalizeSpaces($node.text());
+ content: {
+ selectors: [
+ // enter content selectors
+ '.mainContentIntro', '.content-wrapper'],
- if (scoreCommas(content) < 10) {
- var pCount = $('p', $node).length;
- var inputCount = $('input', $node).length;
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: [],
- // Looks like a form, too many inputs.
- if (inputCount > pCount / 3) {
- $node.remove();
- return;
- }
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: []
+ },
- var contentLength = content.length;
- var imgCount = $('img', $node).length;
+ lead_image_url: {
+ selectors: [['meta[name="og:image"]', 'value']]
+ },
- // Content is too short, and there are no images, so
- // this is probably junk content.
- if (contentLength < 25 && imgCount === 0) {
- $node.remove();
- return;
- }
+ next_page_url: null,
- var density = linkDensity($node);
+ excerpt: null
+};
- // Too high of link density, is probably a menu or
- // something similar.
- // console.log(weight, density, contentLength)
- if (weight < 25 && density > 0.2 && contentLength > 75) {
- $node.remove();
- return;
- }
+// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var PoliticoExtractor = {
+ domain: 'www.politico.com',
+ title: {
+ selectors: [
+ // enter title selectors
+ ['meta[name="og:title"]', 'value']]
+ },
+
+ author: {
+ selectors: ['.story-main-content .byline .vcard']
+ },
- // Too high of a link density, despite the score being
- // high.
- if (weight >= 25 && density > 0.5) {
- // Don't remove the node if it's a list and the
- // previous sibling starts with a colon though. That
- // means it's probably content.
- var tagName = $node.get(0).tagName;
- var nodeIsList = tagName === 'ol' || tagName === 'ul';
- if (nodeIsList) {
- var previousNode = $node.prev();
- if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {
- return;
- }
- }
+ content: {
+ selectors: [
+ // enter content selectors
+ '.story-main-content', '.content-group', '.story-core', '.story-text'],
- $node.remove();
- return;
- }
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: [],
- var scriptCount = $('script', $node).length;
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: ['figcaption']
+ },
- // Too many script tags, not enough content.
- if (scriptCount > 0 && contentLength < 150) {
- $node.remove();
- return;
- }
- }
-}
+ date_published: {
+ selectors: [['.story-main-content .timestamp time[datetime]', 'datetime']]
+ },
-// Given an article, clean it of some superfluous content specified by
-// tags. Things like forms, ads, etc.
-//
-// Tags is an array of tag name's to search through. (like div, form,
-// etc)
-//
-// Return this same doc.
-function cleanTags$$1($article, $) {
- $(CLEAN_CONDITIONALLY_TAGS, $article).each(function (index, node) {
- var $node = $(node);
- var weight = getScore($node);
- if (!weight) {
- weight = getOrInitScore$$1($node, $);
- setScore($node, $, weight);
- }
+ lead_image_url: {
+ selectors: [
+ // enter lead_image_url selectors
+ ['meta[name="og:image"]', 'value']]
+ },
- // drop node if its weight is < 0
- if (weight < 0) {
- $node.remove();
- } else {
- // deteremine if node seems like content
- removeUnlessContent($node, $, weight);
- }
- });
+ dek: {
+ selectors: [['meta[name="description"]', 'value']]
+ },
- return $;
-}
+ next_page_url: null,
-function cleanHeaders($article, $) {
- var title = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : '';
+ excerpt: null
+};
- $(HEADER_TAG_LIST, $article).each(function (index, header) {
- var $header = $(header);
- // Remove any headers that appear before all other p tags in the
- // document. This probably means that it was part of the title, a
- // subtitle or something else extraneous like a datestamp or byline,
- // all of which should be handled by other metadata handling.
- if ($($header, $article).prevAll('p').length === 0) {
- return $header.remove();
- }
+var DeadspinExtractor = {
+ domain: 'deadspin.com',
- // Remove any headers that match the title exactly.
- if (normalizeSpaces($(header).text()) === title) {
- return $header.remove();
- }
+ supportedDomains: ['jezebel.com', 'lifehacker.com', 'kotaku.com', 'gizmodo.com', 'jalopnik.com', 'kinja.com'],
- // If this header has a negative weight, it's probably junk.
- // Get rid of it.
- if (getWeight($(header)) < 0) {
- return $header.remove();
- }
+ title: {
+ selectors: ['h1.headline']
+ },
- return $header;
- });
+ author: {
+ selectors: ['.author']
+ },
- return $;
-}
+ content: {
+ selectors: ['.post-content', '.entry-content'],
-// Rewrite the tag name to div if it's a top level node like body or
-// html to avoid later complications with multiple body tags.
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: {
+ 'iframe.lazyload[data-recommend-id^="youtube://"]': function iframeLazyloadDataRecommendIdYoutube($node) {
+ var youtubeId = $node.attr('id').split('youtube-')[1];
+ $node.attr('src', 'https://www.youtube.com/embed/' + youtubeId);
+ }
+ },
-function rewriteTopLevel$$1(article, $) {
- // I'm not using context here because
- // it's problematic when converting the
- // top-level/root node - AP
- $ = convertNodeTo($('html'), $, 'div');
- $ = convertNodeTo($('body'), $, 'div');
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: []
+ },
- return $;
-}
+ date_published: {
+ selectors: [['time.updated[datetime]', 'datetime']]
+ },
-function absolutize($, rootUrl, attr, $content) {
- $('[' + attr + ']', $content).each(function (_, node) {
- var url = node.attribs[attr];
- var absoluteUrl = URL.resolve(rootUrl, url);
+ lead_image_url: {
+ selectors: [['meta[name="og:image"]', 'value']]
+ },
- node.attribs[attr] = absoluteUrl;
- });
-}
+ dek: {
+ selectors: [
+ // enter selectors
+ ]
+ },
-function makeLinksAbsolute($content, $, url) {
- ['href', 'src'].forEach(function (attr) {
- return absolutize($, url, attr, $content);
- });
+ next_page_url: {
+ selectors: [
+ // enter selectors
+ ]
+ },
- return $content;
-}
+ excerpt: {
+ selectors: [
+ // enter selectors
+ ]
+ }
+};
-function textLength(text) {
- return text.trim().replace(/\s+/g, ' ').length;
-}
+// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var BroadwayWorldExtractor = {
+ domain: 'www.broadwayworld.com',
+ title: {
+ selectors: ['h1.article-title']
+ },
-// Determines what percentage of the text
-// in a node is link text
-// Takes a node, returns a float
-function linkDensity($node) {
- var totalTextLength = textLength($node.text());
+ author: {
+ selectors: ['span[itemprop=author]']
+ },
- var linkText = $node.find('a').text();
- var linkLength = textLength(linkText);
+ content: {
+ selectors: ['div[itemprop=articlebody]'],
- if (totalTextLength > 0) {
- return linkLength / totalTextLength;
- } else if (totalTextLength === 0 && linkLength > 0) {
- return 1;
- }
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: {},
- return 0;
-}
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: []
+ },
-// Given a node type to search for, and a list of meta tag names to
-// search for, find a meta tag associated.
+ date_published: {
+ selectors: [['meta[itemprop=datePublished]', 'value']]
+ },
-function extractFromMeta$$1($, metaNames, cachedNames) {
- var cleanTags$$1 = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : true;
+ lead_image_url: {
+ selectors: [['meta[name="og:image"]', 'value']]
+ },
- var foundNames = metaNames.filter(function (name) {
- return cachedNames.indexOf(name) !== -1;
- });
+ dek: {
+ selectors: [['meta[name="og:description"]', 'value']]
+ },
- var _iteratorNormalCompletion = true;
- var _didIteratorError = false;
- var _iteratorError = undefined;
+ next_page_url: {
+ selectors: [
+ // enter selectors
+ ]
+ },
- try {
- var _loop = function _loop() {
- var name = _step.value;
+ excerpt: {
+ selectors: [
+ // enter selectors
+ ]
+ }
+};
- var type = 'name';
- var value = 'value';
+// Rename CustomExtractor
+// to fit your publication
+// (e.g., NYTimesExtractor)
+var ApartmentTherapyExtractor = {
+ domain: 'www.apartmenttherapy.com',
+ title: {
+ selectors: ['h1.headline']
+ },
- var nodes = $('meta[' + type + '="' + name + '"]');
+ author: {
+ selectors: ['.PostByline__name']
+ },
- // Get the unique value of every matching node, in case there
- // are two meta tags with the same name and value.
- // Remove empty values.
- var values = nodes.map(function (index, node) {
- return $(node).attr(value);
- }).toArray().filter(function (text) {
- return text !== '';
- });
+ content: {
+ selectors: ['div.post__content'],
- // If we have more than one value for the same name, we have a
- // conflict and can't trust any of them. Skip this name. If we have
- // zero, that means our meta tags had no values. Skip this name
- // also.
- if (values.length === 1) {
- var metaValue = void 0;
- // Meta values that contain HTML should be stripped, as they
- // weren't subject to cleaning previously.
- if (cleanTags$$1) {
- metaValue = stripTags(values[0], $);
- } else {
- metaValue = values[0];
- }
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: {
+ 'div[data-render-react-id="images/LazyPicture"]': function divDataRenderReactIdImagesLazyPicture($node, $) {
+ var data = JSON.parse($node.attr('data-props'));
+ var src = data.sources[0].src;
- return {
- v: metaValue
- };
+ var $img = $('').attr('src', src);
+ $node.replaceWith($img);
}
- };
+ },
- for (var _iterator = _getIterator(foundNames), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
- var _ret = _loop();
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: []
+ },
- if ((typeof _ret === 'undefined' ? 'undefined' : _typeof(_ret)) === "object") return _ret.v;
- }
+ date_published: {
+ selectors: [['.PostByline__timestamp[datetime]', 'datetime']]
+ },
- // If nothing is found, return null
- } catch (err) {
- _didIteratorError = true;
- _iteratorError = err;
- } finally {
- try {
- if (!_iteratorNormalCompletion && _iterator.return) {
- _iterator.return();
- }
- } finally {
- if (_didIteratorError) {
- throw _iteratorError;
- }
- }
- }
+ lead_image_url: {
+ selectors: [['meta[name="og:image"]', 'value']]
+ },
- return null;
-}
+ dek: {
+ selectors: [['meta[name=description]', 'value']]
+ },
-function isGoodNode($node, maxChildren) {
- // If it has a number of children, it's more likely a container
- // element. Skip it.
- if ($node.children().length > maxChildren) {
- return false;
- }
- // If it looks to be within a comment, skip it.
- if (withinComment($node)) {
- return false;
+ next_page_url: {
+ selectors: [
+ // enter selectors
+ ]
+ },
+
+ excerpt: {
+ selectors: [
+ // enter selectors
+ ]
}
+};
- return true;
-}
+var MediumExtractor = {
+ domain: 'medium.com',
-// Given a a list of selectors find content that may
-// be extractable from the document. This is for flat
-// meta-information, like author, title, date published, etc.
-function extractFromSelectors$$1($, selectors) {
- var maxChildren = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 1;
- var textOnly = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : true;
- var _iteratorNormalCompletion = true;
- var _didIteratorError = false;
- var _iteratorError = undefined;
+ supportedDomains: ['trackchanges.postlight.com'],
- try {
- for (var _iterator = _getIterator(selectors), _step; !(_iteratorNormalCompletion = (_step = _iterator.next()).done); _iteratorNormalCompletion = true) {
- var selector = _step.value;
+ title: {
+ selectors: ['h1']
+ },
- var nodes = $(selector);
+ author: {
+ selectors: [['meta[name="author"]', 'value']]
+ },
- // If we didn't get exactly one of this selector, this may be
- // a list of articles or comments. Skip it.
- if (nodes.length === 1) {
- var $node = $(nodes[0]);
+ content: {
+ selectors: ['.section-content'],
- if (isGoodNode($node, maxChildren)) {
- var content = void 0;
- if (textOnly) {
- content = $node.text();
- } else {
- content = $node.html();
- }
+ // Is there anything in the content you selected that needs transformed
+ // before it's consumable content? E.g., unusual lazy loaded images
+ transforms: {
+ // Re-write lazy-loaded youtube videos
+ iframe: function iframe($node) {
+ var ytRe = /https:\/\/i.embed.ly\/.+url=https:\/\/i\.ytimg\.com\/vi\/(\w+)\//;
+ var thumb = decodeURIComponent($node.attr('data-thumbnail'));
- if (content) {
- return content;
- }
+ if (ytRe.test(thumb)) {
+ var _thumb$match = thumb.match(ytRe),
+ _thumb$match2 = _slicedToArray(_thumb$match, 2),
+ _ = _thumb$match2[0],
+ youtubeId = _thumb$match2[1]; // eslint-disable-line
+
+
+ $node.attr('src', 'https://www.youtube.com/embed/' + youtubeId);
+ var $parent = $node.parents('figure');
+ $parent.prepend($node.clone());
+ $node.remove();
}
}
- }
- } catch (err) {
- _didIteratorError = true;
- _iteratorError = err;
- } finally {
- try {
- if (!_iteratorNormalCompletion && _iterator.return) {
- _iterator.return();
- }
- } finally {
- if (_didIteratorError) {
- throw _iteratorError;
- }
- }
- }
+ },
- return null;
-}
+ // Is there anything that is in the result that shouldn't be?
+ // The clean selectors will remove anything that matches from
+ // the result
+ clean: []
+ },
-// strips all tags from a string of text
-function stripTags(text, $) {
- // Wrapping text in html element prevents errors when text
- // has no html
- var cleanText = $('' + text + '').text();
- return cleanText === '' ? text : cleanText;
-}
+ date_published: {
+ selectors: [['time[datetime]', 'datetime']]
+ },
-function withinComment($node) {
- var parents = $node.parents().toArray();
- var commentParent = parents.find(function (parent) {
- var classAndId = parent.attribs.class + ' ' + parent.attribs.id;
- return classAndId.includes('comment');
- });
+ lead_image_url: {
+ selectors: [['meta[name="og:image"]', 'value']]
+ },
- return commentParent !== undefined;
-}
+ dek: {
+ selectors: [
+ // enter selectors
+ ]
+ },
-// Given a node, determine if it's article-like enough to return
-// param: node (a cheerio node)
-// return: boolean
+ next_page_url: {
+ selectors: [
+ // enter selectors
+ ]
+ },
-function nodeIsSufficient($node) {
- return $node.text().trim().length >= 100;
-}
+ excerpt: {
+ selectors: [
+ // enter selectors
+ ]
+ }
+};
-function isWordpress($) {
- return $(IS_WP_SELECTOR).length > 0;
-}
-// DOM manipulation
+
+var CustomExtractors = Object.freeze({
+ BloggerExtractor: BloggerExtractor,
+ NYMagExtractor: NYMagExtractor,
+ WikipediaExtractor: WikipediaExtractor,
+ TwitterExtractor: TwitterExtractor,
+ NYTimesExtractor: NYTimesExtractor,
+ TheAtlanticExtractor: TheAtlanticExtractor,
+ NewYorkerExtractor: NewYorkerExtractor,
+ WiredExtractor: WiredExtractor,
+ MSNExtractor: MSNExtractor,
+ YahooExtractor: YahooExtractor,
+ BuzzfeedExtractor: BuzzfeedExtractor,
+ WikiaExtractor: WikiaExtractor,
+ LittleThingsExtractor: LittleThingsExtractor,
+ PoliticoExtractor: PoliticoExtractor,
+ DeadspinExtractor: DeadspinExtractor,
+ BroadwayWorldExtractor: BroadwayWorldExtractor,
+ ApartmentTherapyExtractor: ApartmentTherapyExtractor,
+ MediumExtractor: MediumExtractor
+});
+
+var Extractors = _Object$keys(CustomExtractors).reduce(function (acc, key) {
+ var extractor = CustomExtractors[key];
+ return _extends({}, acc, mergeSupportedDomains(extractor));
+}, {});
// CLEAN AUTHOR CONSTANTS
var CLEAN_AUTHOR_RE = /^\s*(posted |written )?by\s*:?\s*(.*)/i;
@@ -2747,7 +2816,6 @@ function cleanDatePublished(dateString) {
}
// Clean our article content, returning a new, cleaned node.
-
function extractCleanNode(article, _ref) {
var $ = _ref.$,
_ref$cleanConditional = _ref.cleanConditionally,
@@ -2786,7 +2854,7 @@ function extractCleanNode(article, _ref) {
cleanHeaders(article, $, title);
// Make links absolute
- makeLinksAbsolute(article, $, url);
+ makeLinksAbsolute$$1(article, $, url);
// We used to clean UL's and OL's here, but it was leading to
// too many in-article lists being removed. Consider a better
@@ -2798,7 +2866,7 @@ function extractCleanNode(article, _ref) {
removeEmpty(article, $);
// Remove unnecessary attributes
- cleanAttributes(article, $);
+ cleanAttributes$$1(article, $);
return article;
}
@@ -3418,7 +3486,7 @@ function scoreBySibling($img) {
var $sibling = $img.next();
var sibling = $sibling.get(0);
- if (sibling && sibling.tagName === 'figcaption') {
+ if (sibling && sibling.tagName.toLowerCase() === 'figcaption') {
score += 25;
}
@@ -3475,9 +3543,13 @@ var GenericLeadImageUrlExtractor = {
extract: function extract(_ref) {
var $ = _ref.$,
content = _ref.content,
- metaCache = _ref.metaCache;
+ metaCache = _ref.metaCache,
+ html = _ref.html;
var cleanUrl = void 0;
+ if (!$.browser && $('head').length === 0) {
+ $('*').first().prepend(html);
+ }
// Check to see if we have a matching meta tag that we can make use of.
// Moving this higher because common practice is now to use large
@@ -3494,7 +3566,8 @@ var GenericLeadImageUrlExtractor = {
// Next, try to find the "best" image via the content.
// We'd rather not have to fetch each image and check dimensions,
// so try to do some analysis and determine them instead.
- var imgs = $('img', content).toArray();
+ var $content = $(content);
+ var imgs = $('img', $content).toArray();
var imgScores = {};
imgs.forEach(function (img, index) {
@@ -3992,7 +4065,12 @@ function scoreLinks(_ref) {
// Remove any anchor data since we don't do a good job
// standardizing URLs (it's hard), we're going to do
// some checking with and without a trailing slash
- var href = removeAnchor(link.attribs.href);
+ var attrs = getAttrs(link);
+
+ // if href is undefined, return
+ if (!attrs.href) return possiblePages;
+
+ var href = removeAnchor(attrs.href);
var $link = $(link);
var linkText = $link.text();
@@ -4149,8 +4227,9 @@ var GenericWordCountExtractor = {
var content = _ref.content;
var $ = cheerio.load(content);
+ var $content = $('div').first();
- var text = normalizeSpaces($('div').first().text());
+ var text = normalizeSpaces($content.text());
return text.split(/\s/).length;
}
};
@@ -4174,12 +4253,13 @@ var GenericExtractor = {
},
extract: function extract(options) {
- var html = options.html;
+ var html = options.html,
+ $ = options.$;
- if (html) {
- var $ = cheerio.load(html);
- options.$ = $;
+ if (html && !$) {
+ var loaded = cheerio.load(html);
+ options.$ = loaded;
}
var title = this.title(options);
@@ -4248,7 +4328,7 @@ function transformElements($content, $, _ref2) {
// If value is a string, convert directly
if (typeof value === 'string') {
$matches.each(function (index, node) {
- convertNodeTo($(node), $, transforms[key]);
+ convertNodeTo$$1($(node), $, transforms[key]);
});
} else if (typeof value === 'function') {
// If value is function, apply function to node
@@ -4256,7 +4336,7 @@ function transformElements($content, $, _ref2) {
var result = value($(node), $);
// If function returns a string, convert node to that value
if (typeof result === 'string') {
- convertNodeTo($(node), $, result);
+ convertNodeTo$$1($(node), $, result);
}
});
}
@@ -4473,7 +4553,7 @@ var collectAllPages = (function () {
previousUrls.push(next_page_url);
result = _extends({}, result, {
- content: '\n ' + result.content + '\n \n
' + nextPageResult.content
});
next_page_url = nextPageResult.next_page_url;
@@ -4516,52 +4596,73 @@ var Mercury = {
switch (_context.prev = _context.next) {
case 0:
_opts$fetchAllPages = opts.fetchAllPages, fetchAllPages = _opts$fetchAllPages === undefined ? true : _opts$fetchAllPages, _opts$fallback = opts.fallback, fallback = _opts$fallback === undefined ? true : _opts$fallback;
+
+ // if no url was passed and this is the browser version,
+ // set url to window.location.href and load the html
+ // from the current page
+
+ if (!url && cheerio.browser) {
+ url = window.location.href; // eslint-disable-line no-undef
+ html = html || cheerio.html();
+ }
+
parsedUrl = URL.parse(url);
if (validateUrl(parsedUrl)) {
- _context.next = 4;
+ _context.next = 5;
break;
}
return _context.abrupt('return', Errors.badUrl);
- case 4:
+ case 5:
Extractor = getExtractor(url, parsedUrl);
// console.log(`Using extractor for ${Extractor.domain}`);
- _context.next = 7;
+ _context.next = 8;
return Resource.create(url, html, parsedUrl);
- case 7:
+ case 8:
$ = _context.sent;
- if (!$.error) {
- _context.next = 10;
+ if (!$.failed) {
+ _context.next = 11;
break;
}
return _context.abrupt('return', $);
- case 10:
+ case 11:
- html = $.html();
+ // if html still has not been set (i.e., url passed to Mercury.parse),
+ // set html from the response of Resource.create
+ if (!html) {
+ html = $.html();
+ }
// Cached value of every meta name in our document.
// Used when extracting title/author/date_published/dek
metaCache = $('meta').map(function (_, node) {
return $(node).attr('name');
}).toArray();
- result = RootExtractor.extract(Extractor, { url: url, html: html, $: $, metaCache: metaCache, parsedUrl: parsedUrl, fallback: fallback });
+ result = RootExtractor.extract(Extractor, {
+ url: url,
+ html: html,
+ $: $,
+ metaCache: metaCache,
+ parsedUrl: parsedUrl,
+ fallback: fallback
+ });
_result = result, title = _result.title, next_page_url = _result.next_page_url;
// Fetch more pages if next_page_url found
if (!(fetchAllPages && next_page_url)) {
- _context.next = 20;
+ _context.next = 21;
break;
}
- _context.next = 17;
+ _context.next = 18;
return collectAllPages({
Extractor: Extractor,
next_page_url: next_page_url,
@@ -4573,21 +4674,21 @@ var Mercury = {
url: url
});
- case 17:
+ case 18:
result = _context.sent;
- _context.next = 21;
+ _context.next = 22;
break;
- case 20:
+ case 21:
result = _extends({}, result, {
total_pages: 1,
rendered_pages: 1
});
- case 21:
+ case 22:
return _context.abrupt('return', result);
- case 22:
+ case 23:
case 'end':
return _context.stop();
}
@@ -4597,6 +4698,8 @@ var Mercury = {
},
+ browser: !!cheerio.browser,
+
// A convenience method for getting a resource
// to work with, e.g., for custom extractor generator
fetchResource: function fetchResource(url) {
diff --git a/dist/mercury.js.map b/dist/mercury.js.map
index 5db2c096..e8cc4be5 100644
--- a/dist/mercury.js.map
+++ b/dist/mercury.js.map
@@ -1 +1 @@
-{"version":3,"file":null,"sources":["../src/utils/range.js","../src/utils/validate-url.js","../src/utils/errors.js","../src/resource/utils/constants.js","../src/resource/utils/fetch-resource.js","../src/resource/utils/dom/normalize-meta-tags.js","../src/resource/utils/dom/constants.js","../src/resource/utils/dom/convert-lazy-loaded-images.js","../src/resource/utils/dom/clean.js","../src/resource/index.js","../src/utils/merge-supported-domains.js","../src/extractors/custom/blogspot.com/index.js","../src/extractors/custom/nymag.com/index.js","../src/extractors/custom/wikipedia.org/index.js","../src/extractors/custom/twitter.com/index.js","../src/extractors/custom/www.nytimes.com/index.js","../src/extractors/custom/www.theatlantic.com/index.js","../src/extractors/custom/www.newyorker.com/index.js","../src/extractors/custom/www.wired.com/index.js","../src/extractors/custom/www.msn.com/index.js","../src/extractors/custom/www.yahoo.com/index.js","../src/extractors/custom/www.buzzfeed.com/index.js","../src/extractors/custom/fandom.wikia.com/index.js","../src/extractors/custom/www.littlethings.com/index.js","../src/extractors/custom/www.politico.com/index.js","../src/extractors/custom/deadspin.com/index.js","../src/extractors/custom/www.broadwayworld.com/index.js","../src/extractors/custom/www.apartmenttherapy.com/index.js","../src/extractors/custom/medium.com/index.js","../src/extractors/all.js","../src/utils/dom/constants.js","../src/utils/dom/strip-unlikely-candidates.js","../src/utils/dom/brs-to-ps.js","../src/utils/dom/paragraphize.js","../src/utils/dom/convert-to-paragraphs.js","../src/utils/dom/convert-node-to.js","../src/utils/dom/clean-images.js","../src/utils/dom/mark-to-keep.js","../src/utils/dom/strip-junk-tags.js","../src/utils/dom/clean-h-ones.js","../src/utils/dom/clean-attributes.js","../src/utils/dom/remove-empty.js","../src/extractors/generic/content/scoring/constants.js","../src/extractors/generic/content/scoring/get-weight.js","../src/extractors/generic/content/scoring/get-score.js","../src/extractors/generic/content/scoring/score-commas.js","../src/extractors/generic/content/scoring/score-length.js","../src/extractors/generic/content/scoring/score-paragraph.js","../src/extractors/generic/content/scoring/set-score.js","../src/extractors/generic/content/scoring/add-score.js","../src/extractors/generic/content/scoring/add-to-parent.js","../src/extractors/generic/content/scoring/get-or-init-score.js","../src/extractors/generic/content/scoring/score-node.js","../src/extractors/generic/content/scoring/score-content.js","../src/utils/text/normalize-spaces.js","../src/utils/text/extract-from-url.js","../src/utils/text/constants.js","../src/utils/text/page-num-from-url.js","../src/utils/text/remove-anchor.js","../src/utils/text/article-base-url.js","../src/utils/text/has-sentence-end.js","../src/utils/text/excerpt-content.js","../src/extractors/generic/content/scoring/merge-siblings.js","../src/extractors/generic/content/scoring/find-top-candidate.js","../src/extractors/generic/content/scoring/index.js","../src/utils/dom/clean-tags.js","../src/utils/dom/clean-headers.js","../src/utils/dom/rewrite-top-level.js","../src/utils/dom/make-links-absolute.js","../src/utils/dom/link-density.js","../src/utils/dom/extract-from-meta.js","../src/utils/dom/extract-from-selectors.js","../src/utils/dom/strip-tags.js","../src/utils/dom/within-comment.js","../src/utils/dom/node-is-sufficient.js","../src/utils/dom/is-wordpress.js","../src/utils/dom/index.js","../src/cleaners/constants.js","../src/cleaners/author.js","../src/cleaners/lead-image-url.js","../src/cleaners/dek.js","../src/cleaners/date-published.js","../src/cleaners/content.js","../src/cleaners/title.js","../src/cleaners/resolve-split-title.js","../src/cleaners/index.js","../src/extractors/generic/content/extract-best-node.js","../src/extractors/generic/content/extractor.js","../src/extractors/generic/title/constants.js","../src/extractors/generic/title/extractor.js","../src/extractors/generic/author/constants.js","../src/extractors/generic/author/extractor.js","../src/extractors/generic/date-published/constants.js","../src/extractors/generic/date-published/extractor.js","../src/extractors/generic/dek/extractor.js","../src/extractors/generic/lead-image-url/constants.js","../src/extractors/generic/lead-image-url/score-image.js","../src/extractors/generic/lead-image-url/extractor.js","../src/extractors/generic/next-page-url/scoring/utils/score-similarity.js","../src/extractors/generic/next-page-url/scoring/utils/score-link-text.js","../src/extractors/generic/next-page-url/scoring/utils/score-page-in-link.js","../src/extractors/generic/next-page-url/scoring/constants.js","../src/extractors/generic/next-page-url/scoring/utils/score-extraneous-links.js","../src/extractors/generic/next-page-url/scoring/utils/score-by-parents.js","../src/extractors/generic/next-page-url/scoring/utils/score-prev-link.js","../src/extractors/generic/next-page-url/scoring/utils/should-score.js","../src/extractors/generic/next-page-url/scoring/utils/score-base-url.js","../src/extractors/generic/next-page-url/scoring/utils/score-next-link-text.js","../src/extractors/generic/next-page-url/scoring/utils/score-cap-links.js","../src/extractors/generic/next-page-url/scoring/score-links.js","../src/extractors/generic/next-page-url/extractor.js","../src/extractors/generic/url/constants.js","../src/extractors/generic/url/extractor.js","../src/extractors/generic/excerpt/constants.js","../src/extractors/generic/excerpt/extractor.js","../src/extractors/generic/word-count/extractor.js","../src/extractors/generic/index.js","../src/extractors/get-extractor.js","../src/extractors/root-extractor.js","../src/extractors/collect-all-pages.js","../src/mercury.js"],"sourcesContent":["export default function* range(start = 1, end = 1) {\n while (start <= end) {\n yield start += 1;\n }\n}\n","// extremely simple url validation as a first step\nexport default function validateUrl({ hostname }) {\n // If this isn't a valid url, return an error message\n return !!hostname;\n}\n","const Errors = {\n badUrl: {\n error: true,\n messages: 'The url parameter passed does not look like a valid URL. Please check your data and try again.',\n },\n};\n\nexport default Errors;\n","export const REQUEST_HEADERS = {\n 'User-Agent': 'Readability - http://readability.com/about/',\n};\n\n// The number of milliseconds to attempt to fetch a resource before timing out.\nexport const FETCH_TIMEOUT = 10000;\n\n// Content types that we do not extract content from\nconst BAD_CONTENT_TYPES = [\n 'audio/mpeg',\n 'image/gif',\n 'image/jpeg',\n 'image/jpg',\n];\n\nexport const BAD_CONTENT_TYPES_RE = new RegExp(`^(${BAD_CONTENT_TYPES.join('|')})$`, 'i');\n\n// Use this setting as the maximum size an article can be\n// for us to attempt parsing. Defaults to 5 MB.\nexport const MAX_CONTENT_LENGTH = 5242880;\n\n// Turn the global proxy on or off\n// Proxying is not currently enabled in Python source\n// so not implementing logic in port.\nexport const PROXY_DOMAINS = false;\nexport const REQUESTS_PROXIES = {\n http: 'http://38.98.105.139:33333',\n https: 'http://38.98.105.139:33333',\n};\n\nexport const DOMAINS_TO_PROXY = [\n 'nih.gov',\n 'gutenberg.org',\n];\n","import URL from 'url';\nimport request from 'request';\nimport { Errors } from 'utils';\n\nimport {\n REQUEST_HEADERS,\n FETCH_TIMEOUT,\n BAD_CONTENT_TYPES_RE,\n MAX_CONTENT_LENGTH,\n} from './constants';\n\nfunction get(options) {\n return new Promise((resolve, reject) => {\n request(options, (err, response, body) => {\n if (err) {\n reject(err);\n } else {\n resolve({ body, response });\n }\n });\n });\n}\n\n// Evaluate a response to ensure it's something we should be keeping.\n// This does not validate in the sense of a response being 200 level or\n// not. Validation here means that we haven't found reason to bail from\n// further processing of this url.\n\nexport function validateResponse(response, parseNon2xx = false) {\n // Check if we got a valid status code\n // This isn't great, but I'm requiring a statusMessage to be set\n // before short circuiting b/c nock doesn't set it in tests\n // statusMessage only not set in nock response, in which case\n // I check statusCode, which is currently only 200 for OK responses\n // in tests\n if (\n (response.statusMessage && response.statusMessage !== 'OK') ||\n response.statusCode !== 200\n ) {\n if (!response.statusCode) {\n throw new Error(\n `Unable to fetch content. Original exception was ${response.error}`\n );\n } else if (!parseNon2xx) {\n throw new Error(\n `Resource returned a response status code of ${response.statusCode} and resource was instructed to reject non-2xx level status codes.`\n );\n }\n }\n\n const {\n 'content-type': contentType,\n 'content-length': contentLength,\n } = response.headers;\n\n // Check that the content is not in BAD_CONTENT_TYPES\n if (BAD_CONTENT_TYPES_RE.test(contentType)) {\n throw new Error(\n `Content-type for this resource was ${contentType} and is not allowed.`\n );\n }\n\n // Check that the content length is below maximum\n if (contentLength > MAX_CONTENT_LENGTH) {\n throw new Error(\n `Content for this resource was too large. Maximum content length is ${MAX_CONTENT_LENGTH}.`\n );\n }\n\n return true;\n}\n\n// Grabs the last two pieces of the URL and joins them back together\n// This is to get the 'livejournal.com' from 'erotictrains.livejournal.com'\nexport function baseDomain({ host }) {\n return host.split('.').slice(-2).join('.');\n}\n\n// Set our response attribute to the result of fetching our URL.\n// TODO: This should gracefully handle timeouts and raise the\n// proper exceptions on the many failure cases of HTTP.\n// TODO: Ensure we are not fetching something enormous. Always return\n// unicode content for HTML, with charset conversion.\n\nexport default async function fetchResource(url, parsedUrl) {\n parsedUrl = parsedUrl || URL.parse(encodeURI(url));\n\n const options = {\n url: parsedUrl,\n headers: { ...REQUEST_HEADERS },\n timeout: FETCH_TIMEOUT,\n // Don't set encoding; fixes issues\n // w/gzipped responses\n encoding: null,\n // Accept cookies\n jar: true,\n // Accept and decode gzip\n gzip: true,\n // Follow any redirect\n followAllRedirects: true,\n };\n\n const { response, body } = await get(options);\n\n try {\n validateResponse(response);\n return {\n body,\n response,\n };\n } catch (e) {\n return Errors.badUrl;\n }\n}\n","function convertMetaProp($, from, to) {\n $(`meta[${from}]`).each((_, node) => {\n const $node = $(node);\n\n const value = $node.attr(from);\n $node.attr(to, value);\n $node.removeAttr(from);\n });\n\n return $;\n}\n\n// For ease of use in extracting from meta tags,\n// replace the \"content\" attribute on meta tags with the\n// \"value\" attribute.\n//\n// In addition, normalize 'property' attributes to 'name' for ease of\n// querying later. See, e.g., og or twitter meta tags.\n\nexport default function normalizeMetaTags($) {\n $ = convertMetaProp($, 'content', 'value');\n $ = convertMetaProp($, 'property', 'name');\n return $;\n}\n","export const IS_LINK = new RegExp('https?://', 'i');\nexport const IS_IMAGE = new RegExp('.(png|gif|jpe?g)', 'i');\n\nexport const TAGS_TO_REMOVE = [\n 'script',\n 'style',\n 'form',\n].join(',');\n","import {\n IS_LINK,\n IS_IMAGE,\n} from './constants';\n\n// Convert all instances of images with potentially\n// lazy loaded images into normal images.\n// Many sites will have img tags with no source, or an image tag with a src\n// attribute that a is a placeholer. We need to be able to properly fill in\n// the src attribute so the images are no longer lazy loaded.\nexport default function convertLazyLoadedImages($) {\n $('img').each((_, img) => {\n Reflect.ownKeys(img.attribs).forEach((attr) => {\n const value = img.attribs[attr];\n\n if (attr !== 'src' && IS_LINK.test(value) &&\n IS_IMAGE.test(value)) {\n $(img).attr('src', value);\n }\n });\n });\n\n return $;\n}\n","import { TAGS_TO_REMOVE } from './constants';\n\nfunction isComment(index, node) {\n return node.type === 'comment';\n}\n\nfunction cleanComments($) {\n $.root().find('*')\n .contents()\n .filter(isComment)\n .remove();\n\n return $;\n}\n\nexport default function clean($) {\n $(TAGS_TO_REMOVE).remove();\n\n $ = cleanComments($);\n return $;\n}\n","import cheerio from 'cheerio';\n\nimport { fetchResource } from './utils';\nimport {\n normalizeMetaTags,\n convertLazyLoadedImages,\n clean,\n} from './utils/dom';\n\nconst Resource = {\n\n // Create a Resource.\n //\n // :param url: The URL for the document we should retrieve.\n // :param response: If set, use as the response rather than\n // attempting to fetch it ourselves. Expects a\n // string.\n async create(url, preparedResponse, parsedUrl) {\n let result;\n\n if (preparedResponse) {\n const validResponse = {\n statusMessage: 'OK',\n statusCode: 200,\n headers: {\n 'content-type': 'text/html',\n 'content-length': 500,\n },\n };\n\n result = { body: preparedResponse, response: validResponse };\n } else {\n result = await fetchResource(url, parsedUrl);\n }\n\n if (result.error) {\n return result;\n }\n\n return this.generateDoc(result);\n },\n\n generateDoc({ body: content, response }) {\n const { 'content-type': contentType } = response.headers;\n\n // TODO: Implement is_text function from\n // https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57\n if (!contentType.includes('html') &&\n !contentType.includes('text')) {\n throw new Error('Content does not appear to be text.');\n }\n\n let $ = cheerio.load(content, { normalizeWhitespace: true });\n\n if ($.root().children().length === 0) {\n throw new Error('No children, likely a bad parse.');\n }\n\n $ = normalizeMetaTags($);\n $ = convertLazyLoadedImages($);\n $ = clean($);\n\n return $;\n },\n};\n\nexport default Resource;\n","const merge = (extractor, domains) => (\n domains.reduce((acc, domain) => {\n acc[domain] = extractor;\n return acc;\n }, {})\n);\n\nexport default function mergeSupportedDomains(extractor) {\n return extractor.supportedDomains ?\n merge(extractor, [extractor.domain, ...extractor.supportedDomains])\n :\n merge(extractor, [extractor.domain]);\n}\n","export const BloggerExtractor = {\n domain: 'blogspot.com',\n content: {\n // Blogger is insane and does not load its content\n // initially in the page, but it's all there\n // in noscript\n selectors: [\n '.post-content noscript',\n ],\n\n // Selectors to remove from the extracted content\n clean: [\n ],\n\n // Convert the noscript tag to a div\n transforms: {\n noscript: 'div',\n },\n },\n\n author: {\n selectors: [\n '.post-author-name',\n ],\n },\n\n title: {\n selectors: [\n '.post h2.title',\n ],\n },\n\n date_published: {\n selectors: [\n 'span.publishdate',\n ],\n },\n};\n","export const NYMagExtractor = {\n domain: 'nymag.com',\n content: {\n // Order by most likely. Extractor will stop on first occurrence\n selectors: [\n 'div.article-content',\n 'section.body',\n 'article.article',\n ],\n\n // Selectors to remove from the extracted content\n clean: [\n '.ad',\n '.single-related-story',\n ],\n\n // Object of tranformations to make on matched elements\n // Each key is the selector, each value is the tag to\n // transform to.\n // If a function is given, it should return a string\n // to convert to or nothing (in which case it will not perform\n // the transformation.\n transforms: {\n // Convert h1s to h2s\n h1: 'h2',\n\n // Convert lazy-loaded noscript images to figures\n noscript: ($node) => {\n const $children = $node.children();\n if ($children.length === 1 && $children.get(0).tagName === 'img') {\n return 'figure';\n }\n\n return null;\n },\n },\n },\n\n title: {\n selectors: [\n 'h1.lede-feature-title',\n 'h1.headline-primary',\n 'h1',\n ],\n },\n\n author: {\n selectors: [\n '.by-authors',\n '.lede-feature-author',\n ],\n },\n\n dek: {\n selectors: [\n '.lede-feature-teaser',\n ],\n },\n\n date_published: {\n selectors: [\n ['time.article-timestamp[datetime]', 'datetime'],\n 'time.article-timestamp',\n ],\n },\n};\n","export const WikipediaExtractor = {\n domain: 'wikipedia.org',\n content: {\n selectors: [\n '#mw-content-text',\n ],\n\n defaultCleaner: false,\n\n // transform top infobox to an image with caption\n transforms: {\n '.infobox img': ($node) => {\n const $parent = $node.parents('.infobox');\n // Only prepend the first image in .infobox\n if ($parent.children('img').length === 0) {\n $parent.prepend($node);\n }\n },\n '.infobox caption': 'figcaption',\n '.infobox': 'figure',\n },\n\n // Selectors to remove from the extracted content\n clean: [\n '.mw-editsection',\n 'figure tr, figure td, figure tbody',\n '#toc',\n '.navbox',\n ],\n\n },\n\n author: 'Wikipedia Contributors',\n\n title: {\n selectors: [\n 'h2.title',\n ],\n },\n\n date_published: {\n selectors: [\n '#footer-info-lastmod',\n ],\n },\n\n};\n","export const TwitterExtractor = {\n domain: 'twitter.com',\n\n content: {\n transforms: {\n // We're transforming essentially the whole page here.\n // Twitter doesn't have nice selectors, so our initial\n // selector grabs the whole page, then we're re-writing\n // it to fit our needs before we clean it up.\n '.permalink[role=main]': ($node, $) => {\n const tweets = $node.find('.tweet');\n const $tweetContainer = $('');\n $tweetContainer.append(tweets);\n $node.replaceWith($tweetContainer);\n },\n\n // Twitter wraps @ with s, which\n // renders as a strikethrough\n s: 'span',\n },\n\n selectors: [\n '.permalink[role=main]',\n ],\n\n defaultCleaner: false,\n\n clean: [\n '.stream-item-footer',\n 'button',\n '.tweet-details-fixer',\n ],\n },\n\n author: {\n selectors: [\n '.tweet.permalink-tweet .username',\n ],\n },\n\n date_published: {\n selectors: [\n ['.permalink-tweet ._timestamp[data-time-ms]', 'data-time-ms'],\n // '.tweet.permalink-tweet .metadata',\n ],\n },\n\n};\n","export const NYTimesExtractor = {\n domain: 'www.nytimes.com',\n\n title: {\n selectors: [\n '.g-headline',\n 'h1.headline',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n '.g-byline',\n '.byline',\n ],\n },\n\n content: {\n selectors: [\n 'div.g-blocks',\n 'article#story',\n ],\n\n defaultCleaner: false,\n\n transforms: {\n 'img.g-lazy': ($node) => {\n let src = $node.attr('src');\n // const widths = $node.attr('data-widths')\n // .slice(1)\n // .slice(0, -1)\n // .split(',');\n // if (widths.length) {\n // width = widths.slice(-1);\n // } else {\n // width = '900';\n // }\n const width = 640;\n\n src = src.replace('{{size}}', width);\n $node.attr('src', src);\n },\n },\n\n clean: [\n '.ad',\n 'header#story-header',\n '.story-body-1 .lede.video',\n '.visually-hidden',\n '#newsletter-promo',\n '.promo',\n '.comments-button',\n '.hidden',\n ],\n },\n\n date_published: null,\n\n lead_image_url: null,\n\n dek: null,\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\nexport const TheAtlanticExtractor = {\n domain: 'www.theatlantic.com',\n title: {\n selectors: [\n 'h1.hed',\n ],\n },\n\n author: {\n selectors: [\n 'article#article .article-cover-extra .metadata .byline a',\n ],\n },\n\n content: {\n selectors: [\n '.article-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: null,\n\n lead_image_url: null,\n\n dek: null,\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const NewYorkerExtractor = {\n domain: 'www.newyorker.com',\n title: {\n selectors: [\n 'h1.title',\n ],\n },\n\n author: {\n selectors: [\n '.contributors',\n ],\n },\n\n content: {\n selectors: [\n 'div#articleBody',\n 'div.articleBody',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"og:description\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const WiredExtractor = {\n domain: 'www.wired.com',\n title: {\n selectors: [\n 'h1.post-title',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'a[rel=\"author\"]',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n 'article.content',\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.visually-hidden',\n\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[itemprop=\"datePublished\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"og:description\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const MSNExtractor = {\n domain: 'www.msn.com',\n title: {\n selectors: [\n 'h1',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'span.authorname-txt',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n 'div.richtext',\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n 'span.caption',\n\n ],\n },\n\n date_published: {\n selectors: [\n 'span.time',\n ],\n },\n\n lead_image_url: {\n selectors: [\n\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"description\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const YahooExtractor = {\n domain: 'www.yahoo.com',\n title: {\n selectors: [\n 'header.canvas-header',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'span.provider-name',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.content-canvas',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.figure-caption',\n\n ],\n },\n\n date_published: {\n selectors: [\n ['time.date[datetime]', 'datetime'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"og:description\"]', 'value'],\n // enter dek selectors\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const BuzzfeedExtractor = {\n domain: 'www.buzzfeed.com',\n title: {\n selectors: [\n 'h1[id=\"post-title\"]',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'a[data-action=\"user/username\"]', 'byline__author',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n '#buzz_sub_buzz',\n // enter content selectors\n ],\n\n defaultCleaner: false,\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n h2: 'b',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.instapaper_ignore',\n '.suplist_list_hide .buzz_superlist_item .buzz_superlist_number_inline',\n '.share-box',\n ],\n },\n\n date_published: {\n selectors: [\n '.buzz-datetime',\n // enter author selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"description\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const WikiaExtractor = {\n domain: 'fandom.wikia.com',\n title: {\n selectors: [\n 'h1.entry-title',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n '.author vcard', '.fn',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n '.grid-content',\n '.entry-content',\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"og:description\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const LittleThingsExtractor = {\n domain: 'www.littlethings.com',\n title: {\n selectors: [\n 'h1.post-title',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.mainContentIntro',\n '.content-wrapper',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const PoliticoExtractor = {\n domain: 'www.politico.com',\n title: {\n selectors: [\n // enter title selectors\n ['meta[name=\"og:title\"]', 'value'],\n ],\n },\n\n author: {\n selectors: [\n '.story-main-content .byline .vcard',\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.story-main-content',\n '.content-group', '.story-core',\n '.story-text',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n 'figcaption',\n ],\n },\n\n date_published: {\n selectors: [\n ['.story-main-content .timestamp time[datetime]', 'datetime'],\n\n ],\n },\n\n lead_image_url: {\n selectors: [\n // enter lead_image_url selectors\n ['meta[name=\"og:image\"]', 'value'],\n\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"description\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","export const DeadspinExtractor = {\n domain: 'deadspin.com',\n\n supportedDomains: [\n 'jezebel.com',\n 'lifehacker.com',\n 'kotaku.com',\n 'gizmodo.com',\n 'jalopnik.com',\n 'kinja.com',\n ],\n\n title: {\n selectors: [\n 'h1.headline',\n ],\n },\n\n author: {\n selectors: [\n '.author',\n ],\n },\n\n content: {\n selectors: [\n '.post-content',\n '.entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'iframe.lazyload[data-recommend-id^=\"youtube://\"]': ($node) => {\n const youtubeId = $node.attr('id').split('youtube-')[1];\n $node.attr('src', `https://www.youtube.com/embed/${youtubeId}`);\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n ],\n },\n\n date_published: {\n selectors: [\n ['time.updated[datetime]', 'datetime'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const BroadwayWorldExtractor = {\n domain: 'www.broadwayworld.com',\n title: {\n selectors: [\n 'h1.article-title',\n ],\n },\n\n author: {\n selectors: [\n 'span[itemprop=author]',\n ],\n },\n\n content: {\n selectors: [\n 'div[itemprop=articlebody]',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[itemprop=datePublished]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"og:description\"]', 'value'],\n ],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const ApartmentTherapyExtractor = {\n domain: 'www.apartmenttherapy.com',\n title: {\n selectors: [\n 'h1.headline',\n ],\n },\n\n author: {\n selectors: [\n '.PostByline__name',\n ],\n },\n\n content: {\n selectors: [\n 'div.post__content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div[data-render-react-id=\"images/LazyPicture\"]': ($node, $) => {\n const data = JSON.parse($node.attr('data-props'));\n const { src } = data.sources[0];\n const $img = $('').attr('src', src);\n $node.replaceWith($img);\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['.PostByline__timestamp[datetime]', 'datetime'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=description]', 'value'],\n ],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","export const MediumExtractor = {\n domain: 'medium.com',\n\n supportedDomains: [\n 'trackchanges.postlight.com',\n ],\n\n title: {\n selectors: [\n 'h1',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.section-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n // Re-write lazy-loaded youtube videos\n iframe: ($node) => {\n const ytRe =\n /https:\\/\\/i.embed.ly\\/.+url=https:\\/\\/i\\.ytimg\\.com\\/vi\\/(\\w+)\\//;\n const thumb = decodeURIComponent($node.attr('data-thumbnail'));\n\n if (ytRe.test(thumb)) {\n const [_, youtubeId] = thumb.match(ytRe) // eslint-disable-line\n $node.attr('src', `https://www.youtube.com/embed/${youtubeId}`);\n const $parent = $node.parents('figure');\n $parent.prepend($node.clone());\n $node.remove();\n }\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['time[datetime]', 'datetime'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","import mergeSupportedDomains from 'utils/merge-supported-domains';\nimport * as CustomExtractors from './custom/index';\n\nexport default Object.keys(CustomExtractors).reduce((acc, key) => {\n const extractor = CustomExtractors[key];\n return {\n ...acc,\n ...mergeSupportedDomains(extractor),\n };\n}, {});\n","// Spacer images to be removed\nexport const SPACER_RE = new RegExp('trans|transparent|spacer|blank', 'i');\n\n// The class we will use to mark elements we want to keep\n// but would normally remove\nexport const KEEP_CLASS = 'mercury-parser-keep';\n\nexport const KEEP_SELECTORS = [\n 'iframe[src^=\"https://www.youtube.com\"]',\n 'iframe[src^=\"http://www.youtube.com\"]',\n 'iframe[src^=\"https://player.vimeo\"]',\n 'iframe[src^=\"http://player.vimeo\"]',\n];\n\n// A list of tags to strip from the output if we encounter them.\nexport const STRIP_OUTPUT_TAGS = [\n 'title',\n 'script',\n 'noscript',\n 'link',\n 'style',\n 'hr',\n 'embed',\n 'iframe',\n 'object',\n];\n\n// cleanAttributes\nexport const REMOVE_ATTRS = ['style', 'align'];\nexport const REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(selector => `[${selector}]`);\nexport const REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');\nexport const WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt'];\nexport const WHITELIST_ATTRS_RE = new RegExp(`^(${WHITELIST_ATTRS.join('|')})$`, 'i');\n\n// removeEmpty\nexport const REMOVE_EMPTY_TAGS = ['p'];\nexport const REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(tag => `${tag}:empty`).join(',');\n\n// cleanTags\nexport const CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');\n\n// cleanHeaders\nconst HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];\nexport const HEADER_TAG_LIST = HEADER_TAGS.join(',');\n\n// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n 'ad-break',\n 'adbox',\n 'advert',\n 'addthis',\n 'agegate',\n 'aux',\n 'blogger-labels',\n 'combx',\n 'comment',\n 'conversation',\n 'disqus',\n 'entry-unrelated',\n 'extra',\n 'foot',\n // 'form', // This is too generic, has too many false positives\n 'header',\n 'hidden',\n 'loader',\n 'login', // Note: This can hit 'blogindex'.\n 'menu',\n 'meta',\n 'nav',\n 'outbrain',\n 'pager',\n 'pagination',\n 'predicta', // readwriteweb inline ad box\n 'presence_control_external', // lifehacker.com container full of false positives\n 'popup',\n 'printfriendly',\n 'related',\n 'remove',\n 'remark',\n 'rss',\n 'share',\n 'shoutbox',\n 'sidebar',\n 'sociable',\n 'sponsor',\n 'taboola',\n 'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n 'and',\n 'article',\n 'body',\n 'blogindex',\n 'column',\n 'content',\n 'entry-content-asset',\n 'format', // misuse of form\n 'hfeed',\n 'hentry',\n 'hatom',\n 'main',\n 'page',\n 'posts',\n 'shadow',\n];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n 'a',\n 'blockquote',\n 'dl',\n 'div',\n 'img',\n 'p',\n 'pre',\n 'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n 'br',\n 'b',\n 'i',\n 'label',\n 'hr',\n 'area',\n 'base',\n 'basefont',\n 'input',\n 'img',\n 'link',\n 'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE =\n new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n ['.hentry', '.entry-content'],\n ['entry', '.entry-content'],\n ['.entry', '.entry_content'],\n ['.post', '.postbody'],\n ['.post', '.post_body'],\n ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = [\n 'figure',\n 'photo',\n 'image',\n 'caption',\n];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n 'article',\n 'articlecontent',\n 'instapaper_body',\n 'blog',\n 'body',\n 'content',\n 'entry-content-asset',\n 'entry',\n 'hentry',\n 'main',\n 'Normal',\n 'page',\n 'pagination',\n 'permalink',\n 'post',\n 'story',\n 'text',\n '[-_]copy', // usatoday\n '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n 'adbox',\n 'advert',\n 'author',\n 'bio',\n 'bookmark',\n 'bottom',\n 'byline',\n 'clear',\n 'com-',\n 'combx',\n 'comment',\n 'comment\\\\B',\n 'contact',\n 'copy',\n 'credit',\n 'crumb',\n 'date',\n 'deck',\n 'excerpt',\n 'featured', // tnr.com has a featured_content which throws us off\n 'foot',\n 'footer',\n 'footnote',\n 'graf',\n 'head',\n 'info',\n 'infotext', // newscientist.com copyright\n 'instapaper_ignore',\n 'jump',\n 'linebreak',\n 'link',\n 'masthead',\n 'media',\n 'meta',\n 'modal',\n 'outbrain', // slate.com junk\n 'promo',\n 'pr_', // autoblog - press release\n 'related',\n 'respond',\n 'roundcontent', // lifehacker restricted content warning\n 'scroll',\n 'secondary',\n 'share',\n 'shopping',\n 'shoutbox',\n 'side',\n 'sidebar',\n 'sponsor',\n 'stamp',\n 'sub',\n 'summary',\n 'tags',\n 'tools',\n 'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// XPath to try to determine if a page is wordpress. Not always successful.\nexport const IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nexport const EXTRANEOUS_LINK_HINTS = [\n 'print',\n 'archive',\n 'comment',\n 'discuss',\n 'e-mail',\n 'email',\n 'share',\n 'reply',\n 'all',\n 'login',\n 'sign',\n 'single',\n 'adx',\n 'entry-unrelated',\n];\nexport const EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nexport const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\n// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))', 'i');\nexport const NEXT_LINK_TEXT_RE = /(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))/i;\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nexport const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nexport const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match 2 or more consecutive tags\nexport const BR_TAGS_RE = new RegExp('( ]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp(' ]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n 'article',\n 'aside',\n 'blockquote',\n 'body',\n 'br',\n 'button',\n 'canvas',\n 'caption',\n 'col',\n 'colgroup',\n 'dd',\n 'div',\n 'dl',\n 'dt',\n 'embed',\n 'fieldset',\n 'figcaption',\n 'figure',\n 'footer',\n 'form',\n 'h1',\n 'h2',\n 'h3',\n 'h4',\n 'h5',\n 'h6',\n 'header',\n 'hgroup',\n 'hr',\n 'li',\n 'map',\n 'object',\n 'ol',\n 'output',\n 'p',\n 'pre',\n 'progress',\n 'section',\n 'table',\n 'tbody',\n 'textarea',\n 'tfoot',\n 'th',\n 'thead',\n 'tr',\n 'ul',\n 'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import {\n CANDIDATES_WHITELIST,\n CANDIDATES_BLACKLIST,\n} from './constants';\n\nexport default function stripUnlikelyCandidates($) {\n // Loop through the provided document and remove any non-link nodes\n // that are unlikely candidates for article content.\n //\n // Links are ignored because there are very often links to content\n // that are identified as non-body-content, but may be inside\n // article-like content.\n //\n // :param $: a cheerio object to strip nodes from\n // :return $: the cleaned cheerio object\n $('*').not('a').each((index, node) => {\n const $node = $(node);\n const classes = $node.attr('class');\n const id = $node.attr('id');\n if (!id && !classes) return;\n\n const classAndId = `${classes || ''} ${id || ''}`;\n if (CANDIDATES_WHITELIST.test(classAndId)) {\n return;\n } else if (CANDIDATES_BLACKLIST.test(classAndId)) {\n $node.remove();\n }\n });\n\n return $;\n}\n","import { paragraphize } from './index';\n\n// ## NOTES:\n// Another good candidate for refactoring/optimizing.\n// Very imperative code, I don't love it. - AP\n\n// Given cheerio object, convert consecutive tags into\n// tags instead.\n//\n// :param $: A cheerio object\n\nexport default function brsToPs($) {\n let collapsing = false;\n $('br').each((index, element) => {\n const nextElement = $(element).next().get(0);\n\n if (nextElement && nextElement.tagName === 'br') {\n collapsing = true;\n $(element).remove();\n } else if (collapsing) {\n collapsing = false;\n // $(element).replaceWith('')\n paragraphize(element, $, true);\n }\n });\n\n return $;\n}\n","import { BLOCK_LEVEL_TAGS_RE } from './constants';\n\n// Given a node, turn it into a P if it is not already a P, and\n// make sure it conforms to the constraints of a P tag (I.E. does\n// not contain any other block tags.)\n//\n// If the node is a , it treats the following inline siblings\n// as if they were its children.\n//\n// :param node: The node to paragraphize; this is a raw node\n// :param $: The cheerio object to handle dom manipulation\n// :param br: Whether or not the passed node is a br\n\nexport default function paragraphize(node, $, br = false) {\n const $node = $(node);\n\n if (br) {\n let sibling = node.nextSibling;\n const p = $('');\n\n // while the next node is text or not a block level element\n // append it to a new p node\n while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))) {\n const nextSibling = sibling.nextSibling;\n $(sibling).appendTo(p);\n sibling = nextSibling;\n }\n\n $node.replaceWith(p);\n $node.remove();\n return $;\n }\n\n return $;\n}\n","import { brsToPs, convertNodeTo } from 'utils/dom';\n\nimport { DIV_TO_P_BLOCK_TAGS } from './constants';\n\nfunction convertDivs($) {\n $('div').each((index, div) => {\n const $div = $(div);\n const convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;\n\n if (convertable) {\n convertNodeTo($div, $, 'p');\n }\n });\n\n return $;\n}\n\nfunction convertSpans($) {\n $('span').each((index, span) => {\n const $span = $(span);\n const convertable = $span.parents('p, div').length === 0;\n if (convertable) {\n convertNodeTo($span, $, 'p');\n }\n });\n\n return $;\n}\n\n// Loop through the provided doc, and convert any p-like elements to\n// actual paragraph tags.\n//\n// Things fitting this criteria:\n// * Multiple consecutive tags.\n// * tags without block level elements inside of them\n// * tags who are not children of or tags.\n//\n// :param $: A cheerio object to search\n// :return cheerio object with new p elements\n// (By-reference mutation, though. Returned just for convenience.)\n\nexport default function convertToParagraphs($) {\n $ = brsToPs($);\n $ = convertDivs($);\n $ = convertSpans($);\n\n return $;\n}\n","export default function convertNodeTo($node, $, tag = 'p') {\n const node = $node.get(0);\n if (!node) {\n return $;\n }\n const { attribs } = $node.get(0);\n const attribString = Reflect.ownKeys(attribs)\n .map(key => `${key}=${attribs[key]}`)\n .join(' ');\n\n $node.replaceWith(`<${tag} ${attribString}>${$node.contents()}${tag}>`);\n return $;\n}\n","import { SPACER_RE } from './constants';\n\nfunction cleanForHeight($img, $) {\n const height = parseInt($img.attr('height'), 10);\n const width = parseInt($img.attr('width'), 10) || 20;\n\n // Remove images that explicitly have very small heights or\n // widths, because they are most likely shims or icons,\n // which aren't very useful for reading.\n if ((height || 20) < 10 || width < 10) {\n $img.remove();\n } else if (height) {\n // Don't ever specify a height on images, so that we can\n // scale with respect to width without screwing up the\n // aspect ratio.\n $img.removeAttr('height');\n }\n\n return $;\n}\n\n// Cleans out images where the source string matches transparent/spacer/etc\n// TODO This seems very aggressive - AP\nfunction removeSpacers($img, $) {\n if (SPACER_RE.test($img.attr('src'))) {\n $img.remove();\n }\n\n return $;\n}\n\nexport default function cleanImages($article, $) {\n $article.find('img').each((index, img) => {\n const $img = $(img);\n\n cleanForHeight($img, $);\n removeSpacers($img, $);\n });\n\n return $;\n}\n","import URL from 'url';\n\nimport {\n KEEP_SELECTORS,\n KEEP_CLASS,\n} from './constants';\n\nexport default function markToKeep(article, $, url, tags = []) {\n if (tags.length === 0) {\n tags = KEEP_SELECTORS;\n }\n\n if (url) {\n const { protocol, hostname } = URL.parse(url);\n tags = [...tags, `iframe[src^=\"${protocol}//${hostname}\"]`];\n }\n\n $(tags.join(','), article).addClass(KEEP_CLASS);\n\n return $;\n}\n","import {\n STRIP_OUTPUT_TAGS,\n KEEP_CLASS,\n} from './constants';\n\nexport default function stripJunkTags(article, $, tags = []) {\n if (tags.length === 0) {\n tags = STRIP_OUTPUT_TAGS;\n }\n\n // Remove matching elements, but ignore\n // any element with a class of mercury-parser-keep\n $(tags.join(','), article).not(`.${KEEP_CLASS}`).remove();\n\n // Remove the mercury-parser-keep class from result\n $(`.${KEEP_CLASS}`, article).removeClass(KEEP_CLASS);\n\n return $;\n}\n","import { convertNodeTo } from 'utils/dom';\n\n// H1 tags are typically the article title, which should be extracted\n// by the title extractor instead. If there's less than 3 of them (<3),\n// strip them. Otherwise, turn 'em into H2s.\nexport default function cleanHOnes(article, $) {\n const $hOnes = $('h1', article);\n\n if ($hOnes.length < 3) {\n $hOnes.each((index, node) => $(node).remove());\n } else {\n $hOnes.each((index, node) => {\n convertNodeTo($(node), $, 'h2');\n });\n }\n\n return $;\n}\n","import { WHITELIST_ATTRS_RE } from './constants';\n\nfunction removeAllButWhitelist($article) {\n $article.find('*').each((index, node) => {\n node.attribs = Reflect.ownKeys(node.attribs).reduce((acc, attr) => {\n if (WHITELIST_ATTRS_RE.test(attr)) {\n return { ...acc, [attr]: node.attribs[attr] };\n }\n\n return acc;\n }, {});\n });\n\n return $article;\n}\n\n// function removeAttrs(article, $) {\n// REMOVE_ATTRS.forEach((attr) => {\n// $(`[${attr}]`, article).removeAttr(attr);\n// });\n// }\n\n// Remove attributes like style or align\nexport default function cleanAttributes($article) {\n // Grabbing the parent because at this point\n // $article will be wrapped in a div which will\n // have a score set on it.\n return removeAllButWhitelist(\n $article.parent().length ?\n $article.parent() : $article\n );\n}\n","export default function removeEmpty($article, $) {\n $article.find('p').each((index, p) => {\n const $p = $(p);\n if ($p.find('iframe, img').length === 0 && $p.text().trim() === '') $p.remove();\n });\n\n return $;\n}\n","// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n 'ad-break',\n 'adbox',\n 'advert',\n 'addthis',\n 'agegate',\n 'aux',\n 'blogger-labels',\n 'combx',\n 'comment',\n 'conversation',\n 'disqus',\n 'entry-unrelated',\n 'extra',\n 'foot',\n 'form',\n 'header',\n 'hidden',\n 'loader',\n 'login', // Note: This can hit 'blogindex'.\n 'menu',\n 'meta',\n 'nav',\n 'pager',\n 'pagination',\n 'predicta', // readwriteweb inline ad box\n 'presence_control_external', // lifehacker.com container full of false positives\n 'popup',\n 'printfriendly',\n 'related',\n 'remove',\n 'remark',\n 'rss',\n 'share',\n 'shoutbox',\n 'sidebar',\n 'sociable',\n 'sponsor',\n 'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n 'and',\n 'article',\n 'body',\n 'blogindex',\n 'column',\n 'content',\n 'entry-content-asset',\n 'format', // misuse of form\n 'hfeed',\n 'hentry',\n 'hatom',\n 'main',\n 'page',\n 'posts',\n 'shadow',\n];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n 'a',\n 'blockquote',\n 'dl',\n 'div',\n 'img',\n 'p',\n 'pre',\n 'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n 'br',\n 'b',\n 'i',\n 'label',\n 'hr',\n 'area',\n 'base',\n 'basefont',\n 'input',\n 'img',\n 'link',\n 'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE =\n new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n ['.hentry', '.entry-content'],\n ['entry', '.entry-content'],\n ['.entry', '.entry_content'],\n ['.post', '.postbody'],\n ['.post', '.post_body'],\n ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = [\n 'figure',\n 'photo',\n 'image',\n 'caption',\n];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n 'article',\n 'articlecontent',\n 'instapaper_body',\n 'blog',\n 'body',\n 'content',\n 'entry-content-asset',\n 'entry',\n 'hentry',\n 'main',\n 'Normal',\n 'page',\n 'pagination',\n 'permalink',\n 'post',\n 'story',\n 'text',\n '[-_]copy', // usatoday\n '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n 'adbox',\n 'advert',\n 'author',\n 'bio',\n 'bookmark',\n 'bottom',\n 'byline',\n 'clear',\n 'com-',\n 'combx',\n 'comment',\n 'comment\\\\B',\n 'contact',\n 'copy',\n 'credit',\n 'crumb',\n 'date',\n 'deck',\n 'excerpt',\n 'featured', // tnr.com has a featured_content which throws us off\n 'foot',\n 'footer',\n 'footnote',\n 'graf',\n 'head',\n 'info',\n 'infotext', // newscientist.com copyright\n 'instapaper_ignore',\n 'jump',\n 'linebreak',\n 'link',\n 'masthead',\n 'media',\n 'meta',\n 'modal',\n 'outbrain', // slate.com junk\n 'promo',\n 'pr_', // autoblog - press release\n 'related',\n 'respond',\n 'roundcontent', // lifehacker restricted content warning\n 'scroll',\n 'secondary',\n 'share',\n 'shopping',\n 'shoutbox',\n 'side',\n 'sidebar',\n 'sponsor',\n 'stamp',\n 'sub',\n 'summary',\n 'tags',\n 'tools',\n 'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// Match 2 or more consecutive tags\nexport const BR_TAGS_RE = new RegExp('( ]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp(' ]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n 'article',\n 'aside',\n 'blockquote',\n 'body',\n 'br',\n 'button',\n 'canvas',\n 'caption',\n 'col',\n 'colgroup',\n 'dd',\n 'div',\n 'dl',\n 'dt',\n 'embed',\n 'fieldset',\n 'figcaption',\n 'figure',\n 'footer',\n 'form',\n 'h1',\n 'h2',\n 'h3',\n 'h4',\n 'h5',\n 'h6',\n 'header',\n 'hgroup',\n 'hr',\n 'li',\n 'map',\n 'object',\n 'ol',\n 'output',\n 'p',\n 'pre',\n 'progress',\n 'section',\n 'table',\n 'tbody',\n 'textarea',\n 'tfoot',\n 'th',\n 'thead',\n 'tr',\n 'ul',\n 'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import {\n NEGATIVE_SCORE_RE,\n POSITIVE_SCORE_RE,\n PHOTO_HINTS_RE,\n READABILITY_ASSET,\n} from './constants';\n\n// Get the score of a node based on its className and id.\nexport default function getWeight(node) {\n const classes = node.attr('class');\n const id = node.attr('id');\n let score = 0;\n\n if (id) {\n // if id exists, try to score on both positive and negative\n if (POSITIVE_SCORE_RE.test(id)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE.test(id)) {\n score -= 25;\n }\n }\n\n if (classes) {\n if (score === 0) {\n // if classes exist and id did not contribute to score\n // try to score on both positive and negative\n if (POSITIVE_SCORE_RE.test(classes)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE.test(classes)) {\n score -= 25;\n }\n }\n\n // even if score has been set by id, add score for\n // possible photo matches\n // \"try to keep photos if we can\"\n if (PHOTO_HINTS_RE.test(classes)) {\n score += 10;\n }\n\n // add 25 if class matches entry-content-asset,\n // a class apparently instructed for use in the\n // Readability publisher guidelines\n // https://www.readability.com/developers/guidelines\n if (READABILITY_ASSET.test(classes)) {\n score += 25;\n }\n }\n\n return score;\n}\n","// returns the score of a node based on\n// the node's score attribute\n// returns null if no score set\nexport default function getScore($node) {\n return parseFloat($node.attr('score')) || null;\n}\n","// return 1 for every comma in text\nexport default function scoreCommas(text) {\n return (text.match(/,/g) || []).length;\n}\n","const idkRe = new RegExp('^(p|pre)$', 'i');\n\nexport default function scoreLength(textLength, tagName = 'p') {\n const chunks = textLength / 50;\n\n if (chunks > 0) {\n let lengthBonus;\n\n // No idea why p or pre are being tamped down here\n // but just following the source for now\n // Not even sure why tagName is included here,\n // since this is only being called from the context\n // of scoreParagraph\n if (idkRe.test(tagName)) {\n lengthBonus = chunks - 2;\n } else {\n lengthBonus = chunks - 1.25;\n }\n\n return Math.min(Math.max(lengthBonus, 0), 3);\n }\n\n return 0;\n}\n","import {\n scoreCommas,\n scoreLength,\n} from './index';\n\n// Score a paragraph using various methods. Things like number of\n// commas, etc. Higher is better.\nexport default function scoreParagraph(node) {\n let score = 1;\n const text = node.text().trim();\n const textLength = text.length;\n\n // If this paragraph is less than 25 characters, don't count it.\n if (textLength < 25) {\n return 0;\n }\n\n // Add points for any commas within this paragraph\n score += scoreCommas(text);\n\n // For every 50 characters in this paragraph, add another point. Up\n // to 3 points.\n score += scoreLength(textLength);\n\n // Articles can end with short paragraphs when people are being clever\n // but they can also end with short paragraphs setting up lists of junk\n // that we strip. This negative tweaks junk setup paragraphs just below\n // the cutoff threshold.\n if (text.slice(-1) === ':') {\n score -= 1;\n }\n\n return score;\n}\n","export default function setScore($node, $, score) {\n $node.attr('score', score);\n return $node;\n}\n","import {\n getOrInitScore,\n setScore,\n} from './index';\n\nexport default function addScore($node, $, amount) {\n try {\n const score = getOrInitScore($node, $) + amount;\n setScore($node, $, score);\n } catch (e) {\n // Ignoring; error occurs in scoreNode\n }\n\n return $node;\n}\n","import { addScore } from './index';\n\n// Adds 1/4 of a child's score to its parent\nexport default function addToParent(node, $, score) {\n const parent = node.parent();\n if (parent) {\n addScore(parent, $, score * 0.25);\n }\n\n return node;\n}\n","import {\n getScore,\n scoreNode,\n getWeight,\n addToParent,\n} from './index';\n\n// gets and returns the score if it exists\n// if not, initializes a score based on\n// the node's tag type\nexport default function getOrInitScore($node, $, weightNodes = true) {\n let score = getScore($node);\n\n if (score) {\n return score;\n }\n\n score = scoreNode($node);\n\n if (weightNodes) {\n score += getWeight($node);\n }\n\n addToParent($node, $, score);\n\n return score;\n}\n","import { scoreParagraph } from './index';\nimport {\n PARAGRAPH_SCORE_TAGS,\n CHILD_CONTENT_TAGS,\n BAD_TAGS,\n} from './constants';\n\n// Score an individual node. Has some smarts for paragraphs, otherwise\n// just scores based on tag.\nexport default function scoreNode($node) {\n const { tagName } = $node.get(0);\n\n // TODO: Consider ordering by most likely.\n // E.g., if divs are a more common tag on a page,\n // Could save doing that regex test on every node – AP\n if (PARAGRAPH_SCORE_TAGS.test(tagName)) {\n return scoreParagraph($node);\n } else if (tagName === 'div') {\n return 5;\n } else if (CHILD_CONTENT_TAGS.test(tagName)) {\n return 3;\n } else if (BAD_TAGS.test(tagName)) {\n return -3;\n } else if (tagName === 'th') {\n return -5;\n }\n\n return 0;\n}\n","import { convertNodeTo } from 'utils/dom';\n\nimport { HNEWS_CONTENT_SELECTORS } from './constants';\nimport {\n scoreNode,\n setScore,\n getOrInitScore,\n addScore,\n} from './index';\n\nfunction convertSpans($node, $) {\n if ($node.get(0)) {\n const { tagName } = $node.get(0);\n\n if (tagName === 'span') {\n // convert spans to divs\n convertNodeTo($node, $, 'div');\n }\n }\n}\n\nfunction addScoreTo($node, $, score) {\n if ($node) {\n convertSpans($node, $);\n addScore($node, $, score);\n }\n}\n\nfunction scorePs($, weightNodes) {\n $('p, pre').not('[score]').each((index, node) => {\n // The raw score for this paragraph, before we add any parent/child\n // scores.\n let $node = $(node);\n $node = setScore($node, $, getOrInitScore($node, $, weightNodes));\n\n const $parent = $node.parent();\n const rawScore = scoreNode($node);\n\n addScoreTo($parent, $, rawScore, weightNodes);\n if ($parent) {\n // Add half of the individual content score to the\n // grandparent\n addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);\n }\n });\n\n return $;\n}\n\n// score content. Parents get the full value of their children's\n// content score, grandparents half\nexport default function scoreContent($, weightNodes = true) {\n // First, look for special hNews based selectors and give them a big\n // boost, if they exist\n HNEWS_CONTENT_SELECTORS.forEach(([parentSelector, childSelector]) => {\n $(`${parentSelector} ${childSelector}`).each((index, node) => {\n addScore($(node).parent(parentSelector), $, 80);\n });\n });\n\n // Doubling this again\n // Previous solution caused a bug\n // in which parents weren't retaining\n // scores. This is not ideal, and\n // should be fixed.\n scorePs($, weightNodes);\n scorePs($, weightNodes);\n\n return $;\n}\n","const NORMALIZE_RE = /\\s{2,}/g;\n\nexport default function normalizeSpaces(text) {\n return text.replace(NORMALIZE_RE, ' ').trim();\n}\n","// Given a node type to search for, and a list of regular expressions,\n// look to see if this extraction can be found in the URL. Expects\n// that each expression in r_list will return group(1) as the proper\n// string to be cleaned.\n// Only used for date_published currently.\nexport default function extractFromUrl(url, regexList) {\n const matchRe = regexList.find(re => re.test(url));\n if (matchRe) {\n return matchRe.exec(url)[1];\n }\n\n return null;\n}\n","// An expression that looks to try to find the page digit within a URL, if\n// it exists.\n// Matches:\n// page=1\n// pg=1\n// p=1\n// paging=12\n// pag=7\n// pagination/1\n// paging/88\n// pa/83\n// p/11\n//\n// Does not match:\n// pg=102\n// page:2\nexport const PAGE_IN_HREF_RE = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');\n\nexport const HAS_ALPHA_RE = /[a-z]/i;\n\nexport const IS_ALPHA_RE = /^[a-z]+$/i;\nexport const IS_DIGIT_RE = /^[0-9]+$/i;\n","import { PAGE_IN_HREF_RE } from './constants';\n\nexport default function pageNumFromUrl(url) {\n const matches = url.match(PAGE_IN_HREF_RE);\n if (!matches) return null;\n\n const pageNum = parseInt(matches[6], 10);\n\n // Return pageNum < 100, otherwise\n // return null\n return pageNum < 100 ? pageNum : null;\n}\n","export default function removeAnchor(url) {\n return url.split('#')[0].replace(/\\/$/, '');\n}\n","import URL from 'url';\nimport {\n HAS_ALPHA_RE,\n IS_ALPHA_RE,\n IS_DIGIT_RE,\n PAGE_IN_HREF_RE,\n} from './constants';\n\nfunction isGoodSegment(segment, index, firstSegmentHasLetters) {\n let goodSegment = true;\n\n // If this is purely a number, and it's the first or second\n // url_segment, it's probably a page number. Remove it.\n if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {\n goodSegment = true;\n }\n\n // If this is the first url_segment and it's just \"index\",\n // remove it\n if (index === 0 && segment.toLowerCase() === 'index') {\n goodSegment = false;\n }\n\n // If our first or second url_segment is smaller than 3 characters,\n // and the first url_segment had no alphas, remove it.\n if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {\n goodSegment = false;\n }\n\n return goodSegment;\n}\n\n// Take a URL, and return the article base of said URL. That is, no\n// pagination data exists in it. Useful for comparing to other links\n// that might have pagination data within them.\nexport default function articleBaseUrl(url, parsed) {\n const parsedUrl = parsed || URL.parse(url);\n const { protocol, host, path } = parsedUrl;\n\n let firstSegmentHasLetters = false;\n const cleanedSegments = path.split('/')\n .reverse()\n .reduce((acc, rawSegment, index) => {\n let segment = rawSegment;\n\n // Split off and save anything that looks like a file type.\n if (segment.includes('.')) {\n const [possibleSegment, fileExt] = segment.split('.');\n if (IS_ALPHA_RE.test(fileExt)) {\n segment = possibleSegment;\n }\n }\n\n // If our first or second segment has anything looking like a page\n // number, remove it.\n if (PAGE_IN_HREF_RE.test(segment) && index < 2) {\n segment = segment.replace(PAGE_IN_HREF_RE, '');\n }\n\n // If we're on the first segment, check to see if we have any\n // characters in it. The first segment is actually the last bit of\n // the URL, and this will be helpful to determine if we're on a URL\n // segment that looks like \"/2/\" for example.\n if (index === 0) {\n firstSegmentHasLetters = HAS_ALPHA_RE.test(segment);\n }\n\n // If it's not marked for deletion, push it to cleaned_segments.\n if (isGoodSegment(segment, index, firstSegmentHasLetters)) {\n acc.push(segment);\n }\n\n return acc;\n }, []);\n\n return `${protocol}//${host}${cleanedSegments.reverse().join('/')}`;\n}\n","// Given a string, return True if it appears to have an ending sentence\n// within it, false otherwise.\nconst SENTENCE_END_RE = new RegExp('.( |$)');\nexport default function hasSentenceEnd(text) {\n return SENTENCE_END_RE.test(text);\n}\n","export default function excerptContent(content, words = 10) {\n return content.trim()\n .split(/\\s+/)\n .slice(0, words)\n .join(' ');\n}\n","import {\n textLength,\n linkDensity,\n} from 'utils/dom';\nimport { hasSentenceEnd } from 'utils/text';\n\nimport { NON_TOP_CANDIDATE_TAGS_RE } from './constants';\nimport { getScore } from './index';\n\n// Now that we have a top_candidate, look through the siblings of\n// it to see if any of them are decently scored. If they are, they\n// may be split parts of the content (Like two divs, a preamble and\n// a body.) Example:\n// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14\nexport default function mergeSiblings($candidate, topScore, $) {\n if (!$candidate.parent().length) {\n return $candidate;\n }\n\n const siblingScoreThreshold = Math.max(10, topScore * 0.25);\n const wrappingDiv = $('');\n\n $candidate.parent().children().each((index, sibling) => {\n const $sibling = $(sibling);\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE.test(sibling.tagName)) {\n return null;\n }\n\n const siblingScore = getScore($sibling);\n if (siblingScore) {\n if ($sibling === $candidate) {\n wrappingDiv.append($sibling);\n } else {\n let contentBonus = 0;\n const density = linkDensity($sibling);\n\n // If sibling has a very low link density,\n // give it a small bonus\n if (density < 0.05) {\n contentBonus += 20;\n }\n\n // If sibling has a high link density,\n // give it a penalty\n if (density >= 0.5) {\n contentBonus -= 20;\n }\n\n // If sibling node has the same class as\n // candidate, give it a bonus\n if ($sibling.attr('class') === $candidate.attr('class')) {\n contentBonus += topScore * 0.2;\n }\n\n const newScore = siblingScore + contentBonus;\n\n if (newScore >= siblingScoreThreshold) {\n return wrappingDiv.append($sibling);\n } else if (sibling.tagName === 'p') {\n const siblingContent = $sibling.text();\n const siblingContentLength = textLength(siblingContent);\n\n if (siblingContentLength > 80 && density < 0.25) {\n return wrappingDiv.append($sibling);\n } else if (siblingContentLength <= 80 && density === 0 &&\n hasSentenceEnd(siblingContent)) {\n return wrappingDiv.append($sibling);\n }\n }\n }\n }\n\n return null;\n });\n\n return wrappingDiv;\n}\n","import { NON_TOP_CANDIDATE_TAGS_RE } from './constants';\nimport { getScore } from './index';\nimport mergeSiblings from './merge-siblings';\n\n// After we've calculated scores, loop through all of the possible\n// candidate nodes we found and find the one with the highest score.\nexport default function findTopCandidate($) {\n let $candidate;\n let topScore = 0;\n\n $('[score]').each((index, node) => {\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE.test(node.tagName)) {\n return;\n }\n\n const $node = $(node);\n const score = getScore($node);\n\n if (score > topScore) {\n topScore = score;\n $candidate = $node;\n }\n });\n\n // If we don't have a candidate, return the body\n // or whatever the first element is\n if (!$candidate) {\n return $('body') || $('*').first();\n }\n\n $candidate = mergeSiblings($candidate, topScore, $);\n\n return $candidate;\n}\n","// Scoring\nexport { default as getWeight } from './get-weight';\nexport { default as getScore } from './get-score';\nexport { default as scoreCommas } from './score-commas';\nexport { default as scoreLength } from './score-length';\nexport { default as scoreParagraph } from './score-paragraph';\nexport { default as setScore } from './set-score';\nexport { default as addScore } from './add-score';\nexport { default as addToParent } from './add-to-parent';\nexport { default as getOrInitScore } from './get-or-init-score';\nexport { default as scoreNode } from './score-node';\nexport { default as scoreContent } from './score-content';\nexport { default as findTopCandidate } from './find-top-candidate';\n","import {\n getScore,\n setScore,\n getOrInitScore,\n scoreCommas,\n} from 'extractors/generic/content/scoring';\n\nimport { CLEAN_CONDITIONALLY_TAGS } from './constants';\nimport { normalizeSpaces } from '../text';\nimport { linkDensity } from './index';\n\nfunction removeUnlessContent($node, $, weight) {\n // Explicitly save entry-content-asset tags, which are\n // noted as valuable in the Publisher guidelines. For now\n // this works everywhere. We may want to consider making\n // this less of a sure-thing later.\n if ($node.hasClass('entry-content-asset')) {\n return;\n }\n\n const content = normalizeSpaces($node.text());\n\n if (scoreCommas(content) < 10) {\n const pCount = $('p', $node).length;\n const inputCount = $('input', $node).length;\n\n // Looks like a form, too many inputs.\n if (inputCount > (pCount / 3)) {\n $node.remove();\n return;\n }\n\n const contentLength = content.length;\n const imgCount = $('img', $node).length;\n\n // Content is too short, and there are no images, so\n // this is probably junk content.\n if (contentLength < 25 && imgCount === 0) {\n $node.remove();\n return;\n }\n\n const density = linkDensity($node);\n\n // Too high of link density, is probably a menu or\n // something similar.\n // console.log(weight, density, contentLength)\n if (weight < 25 && density > 0.2 && contentLength > 75) {\n $node.remove();\n return;\n }\n\n // Too high of a link density, despite the score being\n // high.\n if (weight >= 25 && density > 0.5) {\n // Don't remove the node if it's a list and the\n // previous sibling starts with a colon though. That\n // means it's probably content.\n const tagName = $node.get(0).tagName;\n const nodeIsList = tagName === 'ol' || tagName === 'ul';\n if (nodeIsList) {\n const previousNode = $node.prev();\n if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {\n return;\n }\n }\n\n $node.remove();\n return;\n }\n\n const scriptCount = $('script', $node).length;\n\n // Too many script tags, not enough content.\n if (scriptCount > 0 && contentLength < 150) {\n $node.remove();\n return;\n }\n }\n}\n\n// Given an article, clean it of some superfluous content specified by\n// tags. Things like forms, ads, etc.\n//\n// Tags is an array of tag name's to search through. (like div, form,\n// etc)\n//\n// Return this same doc.\nexport default function cleanTags($article, $) {\n $(CLEAN_CONDITIONALLY_TAGS, $article).each((index, node) => {\n const $node = $(node);\n let weight = getScore($node);\n if (!weight) {\n weight = getOrInitScore($node, $);\n setScore($node, $, weight);\n }\n\n // drop node if its weight is < 0\n if (weight < 0) {\n $node.remove();\n } else {\n // deteremine if node seems like content\n removeUnlessContent($node, $, weight);\n }\n });\n\n return $;\n}\n","import { getWeight } from 'extractors/generic/content/scoring';\n\nimport { HEADER_TAG_LIST } from './constants';\nimport { normalizeSpaces } from '../text';\n\nexport default function cleanHeaders($article, $, title = '') {\n $(HEADER_TAG_LIST, $article).each((index, header) => {\n const $header = $(header);\n // Remove any headers that appear before all other p tags in the\n // document. This probably means that it was part of the title, a\n // subtitle or something else extraneous like a datestamp or byline,\n // all of which should be handled by other metadata handling.\n if ($($header, $article).prevAll('p').length === 0) {\n return $header.remove();\n }\n\n // Remove any headers that match the title exactly.\n if (normalizeSpaces($(header).text()) === title) {\n return $header.remove();\n }\n\n // If this header has a negative weight, it's probably junk.\n // Get rid of it.\n if (getWeight($(header)) < 0) {\n return $header.remove();\n }\n\n return $header;\n });\n\n return $;\n}\n","import { convertNodeTo } from 'utils/dom';\n\n// Rewrite the tag name to div if it's a top level node like body or\n// html to avoid later complications with multiple body tags.\nexport default function rewriteTopLevel(article, $) {\n // I'm not using context here because\n // it's problematic when converting the\n // top-level/root node - AP\n $ = convertNodeTo($('html'), $, 'div');\n $ = convertNodeTo($('body'), $, 'div');\n\n return $;\n}\n","import URL from 'url';\n\nfunction absolutize($, rootUrl, attr, $content) {\n $(`[${attr}]`, $content).each((_, node) => {\n const url = node.attribs[attr];\n const absoluteUrl = URL.resolve(rootUrl, url);\n\n node.attribs[attr] = absoluteUrl;\n });\n}\n\nexport default function makeLinksAbsolute($content, $, url) {\n ['href', 'src'].forEach(attr => absolutize($, url, attr, $content));\n\n return $content;\n}\n","export function textLength(text) {\n return text.trim()\n .replace(/\\s+/g, ' ')\n .length;\n}\n\n// Determines what percentage of the text\n// in a node is link text\n// Takes a node, returns a float\nexport function linkDensity($node) {\n const totalTextLength = textLength($node.text());\n\n const linkText = $node.find('a').text();\n const linkLength = textLength(linkText);\n\n if (totalTextLength > 0) {\n return linkLength / totalTextLength;\n } else if (totalTextLength === 0 && linkLength > 0) {\n return 1;\n }\n\n return 0;\n}\n","import { stripTags } from 'utils/dom';\n\n// Given a node type to search for, and a list of meta tag names to\n// search for, find a meta tag associated.\nexport default function extractFromMeta(\n $,\n metaNames,\n cachedNames,\n cleanTags = true\n) {\n const foundNames = metaNames.filter(name => cachedNames.indexOf(name) !== -1);\n\n for (const name of foundNames) {\n const type = 'name';\n const value = 'value';\n\n const nodes = $(`meta[${type}=\"${name}\"]`);\n\n // Get the unique value of every matching node, in case there\n // are two meta tags with the same name and value.\n // Remove empty values.\n const values =\n nodes.map((index, node) => $(node).attr(value))\n .toArray()\n .filter(text => text !== '');\n\n // If we have more than one value for the same name, we have a\n // conflict and can't trust any of them. Skip this name. If we have\n // zero, that means our meta tags had no values. Skip this name\n // also.\n if (values.length === 1) {\n let metaValue;\n // Meta values that contain HTML should be stripped, as they\n // weren't subject to cleaning previously.\n if (cleanTags) {\n metaValue = stripTags(values[0], $);\n } else {\n metaValue = values[0];\n }\n\n return metaValue;\n }\n }\n\n // If nothing is found, return null\n return null;\n}\n","import { withinComment } from 'utils/dom';\n\nfunction isGoodNode($node, maxChildren) {\n // If it has a number of children, it's more likely a container\n // element. Skip it.\n if ($node.children().length > maxChildren) {\n return false;\n }\n // If it looks to be within a comment, skip it.\n if (withinComment($node)) {\n return false;\n }\n\n return true;\n}\n\n// Given a a list of selectors find content that may\n// be extractable from the document. This is for flat\n// meta-information, like author, title, date published, etc.\nexport default function extractFromSelectors(\n $,\n selectors,\n maxChildren = 1,\n textOnly = true\n) {\n for (const selector of selectors) {\n const nodes = $(selector);\n\n // If we didn't get exactly one of this selector, this may be\n // a list of articles or comments. Skip it.\n if (nodes.length === 1) {\n const $node = $(nodes[0]);\n\n if (isGoodNode($node, maxChildren)) {\n let content;\n if (textOnly) {\n content = $node.text();\n } else {\n content = $node.html();\n }\n\n if (content) {\n return content;\n }\n }\n }\n }\n\n return null;\n}\n","// strips all tags from a string of text\nexport default function stripTags(text, $) {\n // Wrapping text in html element prevents errors when text\n // has no html\n const cleanText = $(`${text}`).text();\n return cleanText === '' ? text : cleanText;\n}\n","export default function withinComment($node) {\n const parents = $node.parents().toArray();\n const commentParent = parents.find((parent) => {\n const classAndId = `${parent.attribs.class} ${parent.attribs.id}`;\n return classAndId.includes('comment');\n });\n\n return commentParent !== undefined;\n}\n","// Given a node, determine if it's article-like enough to return\n// param: node (a cheerio node)\n// return: boolean\n\nexport default function nodeIsSufficient($node) {\n return $node.text().trim().length >= 100;\n}\n","import { IS_WP_SELECTOR } from './constants';\n\nexport default function isWordpress($) {\n return $(IS_WP_SELECTOR).length > 0;\n}\n","// DOM manipulation\nexport { default as stripUnlikelyCandidates } from './strip-unlikely-candidates';\nexport { default as brsToPs } from './brs-to-ps';\nexport { default as paragraphize } from './paragraphize';\nexport { default as convertToParagraphs } from './convert-to-paragraphs';\nexport { default as convertNodeTo } from './convert-node-to';\nexport { default as cleanImages } from './clean-images';\nexport { default as markToKeep } from './mark-to-keep';\nexport { default as stripJunkTags } from './strip-junk-tags';\nexport { default as cleanHOnes } from './clean-h-ones';\nexport { default as cleanAttributes } from './clean-attributes';\nexport { default as removeEmpty } from './remove-empty';\nexport { default as cleanTags } from './clean-tags';\nexport { default as cleanHeaders } from './clean-headers';\nexport { default as rewriteTopLevel } from './rewrite-top-level';\nexport { default as makeLinksAbsolute } from './make-links-absolute';\nexport { textLength, linkDensity } from './link-density';\nexport { default as extractFromMeta } from './extract-from-meta';\nexport { default as extractFromSelectors } from './extract-from-selectors';\nexport { default as stripTags } from './strip-tags';\nexport { default as withinComment } from './within-comment';\nexport { default as nodeIsSufficient } from './node-is-sufficient';\nexport { default as isWordpress } from './is-wordpress';\n","// CLEAN AUTHOR CONSTANTS\nexport const CLEAN_AUTHOR_RE = /^\\s*(posted |written )?by\\s*:?\\s*(.*)/i;\n // author = re.sub(r'^\\s*(posted |written )?by\\s*:?\\s*(.*)(?i)',\n\n// CLEAN DEK CONSTANTS\nexport const TEXT_LINK_RE = new RegExp('http(s)?://', 'i');\n// An ordered list of meta tag names that denote likely article deks.\n// From most distinct to least distinct.\n//\n// NOTE: There are currently no meta tags that seem to provide the right\n// content consistenty enough. Two options were:\n// - og:description\n// - dc.description\n// However, these tags often have SEO-specific junk in them that's not\n// header-worthy like a dek is. Excerpt material at best.\nexport const DEK_META_TAGS = [\n];\n\n// An ordered list of Selectors to find likely article deks. From\n// most explicit to least explicit.\n//\n// Should be more restrictive than not, as a failed dek can be pretty\n// detrimental to the aesthetics of an article.\nexport const DEK_SELECTORS = [\n '.entry-summary',\n];\n\n// CLEAN DATE PUBLISHED CONSTANTS\nexport const MS_DATE_STRING = /^\\d{13}$/i;\nexport const SEC_DATE_STRING = /^\\d{10}$/i;\nexport const CLEAN_DATE_STRING_RE = /^\\s*published\\s*:?\\s*(.*)/i;\nexport const TIME_MERIDIAN_SPACE_RE = /(.*\\d)(am|pm)(.*)/i;\nexport const TIME_MERIDIAN_DOTS_RE = /\\.m\\./i;\nconst months = [\n 'jan',\n 'feb',\n 'mar',\n 'apr',\n 'may',\n 'jun',\n 'jul',\n 'aug',\n 'sep',\n 'oct',\n 'nov',\n 'dec',\n];\nconst allMonths = months.join('|');\nconst timestamp1 = '[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';\nconst timestamp2 = '[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';\nexport const SPLIT_DATE_STRING =\n new RegExp(`(${timestamp1})|(${timestamp2})|([0-9]{1,4})|(${allMonths})`, 'ig');\n\n// CLEAN TITLE CONSTANTS\n// A regular expression that will match separating characters on a\n// title, that usually denote breadcrumbs or something similar.\nexport const TITLE_SPLITTERS_RE = /(: | - | \\| )/g;\n\nexport const DOMAIN_ENDINGS_RE =\n new RegExp('.com$|.net$|.org$|.co.uk$', 'g');\n","import { CLEAN_AUTHOR_RE } from './constants';\n\n// Take an author string (like 'By David Smith ') and clean it to\n// just the name(s): 'David Smith'.\nexport default function cleanAuthor(author) {\n return author.replace(CLEAN_AUTHOR_RE, '$2').trim();\n}\n","import validUrl from 'valid-url';\n\nexport default function clean(leadImageUrl) {\n leadImageUrl = leadImageUrl.trim();\n if (validUrl.isWebUri(leadImageUrl)) {\n return leadImageUrl;\n }\n\n return null;\n}\n","import { stripTags } from 'utils/dom';\nimport { excerptContent } from 'utils/text';\n\nimport { TEXT_LINK_RE } from './constants';\n\n// Take a dek HTML fragment, and return the cleaned version of it.\n// Return None if the dek wasn't good enough.\nexport default function cleanDek(dek, { $, excerpt }) {\n // Sanity check that we didn't get too short or long of a dek.\n if (dek.length > 1000 || dek.length < 5) return null;\n\n // Check that dek isn't the same as excerpt\n if (excerpt && excerptContent(excerpt, 10) === excerptContent(dek, 10)) return null;\n\n const dekText = stripTags(dek, $);\n\n // Plain text links shouldn't exist in the dek. If we have some, it's\n // not a good dek - bail.\n if (TEXT_LINK_RE.test(dekText)) return null;\n\n return dekText.trim();\n}\n","import moment from 'moment';\n// Is there a compelling reason to use moment here?\n// Mostly only being used for the isValid() method,\n// but could just check for 'Invalid Date' string.\n\nimport {\n MS_DATE_STRING,\n SEC_DATE_STRING,\n CLEAN_DATE_STRING_RE,\n SPLIT_DATE_STRING,\n TIME_MERIDIAN_SPACE_RE,\n TIME_MERIDIAN_DOTS_RE,\n} from './constants';\n\nexport function cleanDateString(dateString) {\n return (dateString.match(SPLIT_DATE_STRING) || [])\n .join(' ')\n .replace(TIME_MERIDIAN_DOTS_RE, 'm')\n .replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3')\n .replace(CLEAN_DATE_STRING_RE, '$1')\n .trim();\n}\n\n// Take a date published string, and hopefully return a date out of\n// it. Return none if we fail.\nexport default function cleanDatePublished(dateString) {\n // If string is in milliseconds or seconds, convert to int\n if (MS_DATE_STRING.test(dateString) || SEC_DATE_STRING.test(dateString)) {\n dateString = parseInt(dateString, 10);\n }\n\n let date = moment(new Date(dateString));\n\n if (!date.isValid()) {\n dateString = cleanDateString(dateString);\n date = moment(new Date(dateString));\n }\n\n return date.isValid() ? date.toISOString() : null;\n}\n","import {\n cleanAttributes,\n cleanHeaders,\n cleanHOnes,\n cleanImages,\n cleanTags,\n removeEmpty,\n rewriteTopLevel,\n markToKeep,\n stripJunkTags,\n makeLinksAbsolute,\n} from 'utils/dom';\n\n// Clean our article content, returning a new, cleaned node.\nexport default function extractCleanNode(\n article,\n {\n $,\n cleanConditionally = true,\n title = '',\n url = '',\n defaultCleaner = true,\n }\n) {\n // Rewrite the tag name to div if it's a top level node like body or\n // html to avoid later complications with multiple body tags.\n rewriteTopLevel(article, $);\n\n // Drop small images and spacer images\n // Only do this is defaultCleaner is set to true;\n // this can sometimes be too aggressive.\n if (defaultCleaner) cleanImages(article, $);\n\n // Mark elements to keep that would normally be removed.\n // E.g., stripJunkTags will remove iframes, so we're going to mark\n // YouTube/Vimeo videos as elements we want to keep.\n markToKeep(article, $, url);\n\n // Drop certain tags like , etc\n // This is -mostly- for cleanliness, not security.\n stripJunkTags(article, $);\n\n // H1 tags are typically the article title, which should be extracted\n // by the title extractor instead. If there's less than 3 of them (<3),\n // strip them. Otherwise, turn 'em into H2s.\n cleanHOnes(article, $);\n\n // Clean headers\n cleanHeaders(article, $, title);\n\n // Make links absolute\n makeLinksAbsolute(article, $, url);\n\n // We used to clean UL's and OL's here, but it was leading to\n // too many in-article lists being removed. Consider a better\n // way to detect menus particularly and remove them.\n // Also optionally running, since it can be overly aggressive.\n if (defaultCleaner) cleanTags(article, $, cleanConditionally);\n\n // Remove empty paragraph nodes\n removeEmpty(article, $);\n\n // Remove unnecessary attributes\n cleanAttributes(article, $);\n\n return article;\n}\n","import { stripTags } from 'utils/dom';\n\nimport { TITLE_SPLITTERS_RE } from './constants';\nimport { resolveSplitTitle } from './index';\n\nexport default function cleanTitle(title, { url, $ }) {\n // If title has |, :, or - in it, see if\n // we can clean it up.\n if (TITLE_SPLITTERS_RE.test(title)) {\n title = resolveSplitTitle(title, url);\n }\n\n // Final sanity check that we didn't get a crazy title.\n // if (title.length > 150 || title.length < 15) {\n if (title.length > 150) {\n // If we did, return h1 from the document if it exists\n const h1 = $('h1');\n if (h1.length === 1) {\n title = h1.text();\n }\n }\n\n // strip any html tags in the title text\n return stripTags(title, $).trim();\n}\n","import URL from 'url';\nimport wuzzy from 'wuzzy';\n\nimport {\n TITLE_SPLITTERS_RE,\n DOMAIN_ENDINGS_RE,\n} from './constants';\n\nfunction extractBreadcrumbTitle(splitTitle, text) {\n // This must be a very breadcrumbed title, like:\n // The Best Gadgets on Earth : Bits : Blogs : NYTimes.com\n // NYTimes - Blogs - Bits - The Best Gadgets on Earth\n if (splitTitle.length >= 6) {\n // Look to see if we can find a breadcrumb splitter that happens\n // more than once. If we can, we'll be able to better pull out\n // the title.\n const termCounts = splitTitle.reduce((acc, titleText) => {\n acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;\n return acc;\n }, {});\n\n const [maxTerm, termCount] =\n Reflect.ownKeys(termCounts)\n .reduce((acc, key) => {\n if (acc[1] < termCounts[key]) {\n return [key, termCounts[key]];\n }\n\n return acc;\n }, [0, 0]);\n\n // We found a splitter that was used more than once, so it\n // is probably the breadcrumber. Split our title on that instead.\n // Note: max_term should be <= 4 characters, so that \" >> \"\n // will match, but nothing longer than that.\n if (termCount >= 2 && maxTerm.length <= 4) {\n splitTitle = text.split(maxTerm);\n }\n\n const splitEnds = [splitTitle[0], splitTitle.slice(-1)];\n const longestEnd = splitEnds.reduce((acc, end) => acc.length > end.length ? acc : end, '');\n\n if (longestEnd.length > 10) {\n return longestEnd;\n }\n\n return text;\n }\n\n return null;\n}\n\nfunction cleanDomainFromTitle(splitTitle, url) {\n // Search the ends of the title, looking for bits that fuzzy match\n // the URL too closely. If one is found, discard it and return the\n // rest.\n //\n // Strip out the big TLDs - it just makes the matching a bit more\n // accurate. Not the end of the world if it doesn't strip right.\n const { host } = URL.parse(url);\n const nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');\n\n const startSlug = splitTitle[0].toLowerCase().replace(' ', '');\n const startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);\n\n if (startSlugRatio > 0.4 && startSlug.length > 5) {\n return splitTitle.slice(2).join('');\n }\n\n const endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '');\n const endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);\n\n if (endSlugRatio > 0.4 && endSlug.length >= 5) {\n return splitTitle.slice(0, -2).join('');\n }\n\n return null;\n}\n\n// Given a title with separators in it (colons, dashes, etc),\n// resolve whether any of the segments should be removed.\nexport default function resolveSplitTitle(title, url = '') {\n // Splits while preserving splitters, like:\n // ['The New New York', ' - ', 'The Washington Post']\n const splitTitle = title.split(TITLE_SPLITTERS_RE);\n if (splitTitle.length === 1) {\n return title;\n }\n\n let newTitle = extractBreadcrumbTitle(splitTitle, title);\n if (newTitle) return newTitle;\n\n newTitle = cleanDomainFromTitle(splitTitle, url);\n if (newTitle) return newTitle;\n\n // Fuzzy ratio didn't find anything, so this title is probably legit.\n // Just return it all.\n return title;\n}\n","import cleanAuthor from './author';\nimport cleanImage from './lead-image-url';\nimport cleanDek from './dek';\nimport cleanDatePublished from './date-published';\nimport cleanContent from './content';\nimport cleanTitle from './title';\n\nconst Cleaners = {\n author: cleanAuthor,\n lead_image_url: cleanImage,\n dek: cleanDek,\n date_published: cleanDatePublished,\n content: cleanContent,\n title: cleanTitle,\n};\n\nexport default Cleaners;\n\nexport { cleanAuthor };\nexport { cleanImage };\nexport { cleanDek };\nexport { cleanDatePublished };\nexport { cleanContent };\nexport { cleanTitle };\nexport { default as resolveSplitTitle } from './resolve-split-title';\n","import {\n stripUnlikelyCandidates,\n convertToParagraphs,\n} from 'utils/dom';\n\nimport {\n scoreContent,\n findTopCandidate,\n} from './scoring';\n\n// Using a variety of scoring techniques, extract the content most\n// likely to be article text.\n//\n// If strip_unlikely_candidates is True, remove any elements that\n// match certain criteria first. (Like, does this element have a\n// classname of \"comment\")\n//\n// If weight_nodes is True, use classNames and IDs to determine the\n// worthiness of nodes.\n//\n// Returns a cheerio object $\nexport default function extractBestNode($, opts) {\n // clone the node so we can get back to our\n // initial parsed state if needed\n // TODO Do I need this? – AP\n // let $root = $.root().clone()\n\n if (opts.stripUnlikelyCandidates) {\n $ = stripUnlikelyCandidates($);\n }\n\n $ = convertToParagraphs($);\n $ = scoreContent($, opts.weightNodes);\n const $topCandidate = findTopCandidate($);\n\n return $topCandidate;\n}\n","import cheerio from 'cheerio';\n\nimport { nodeIsSufficient } from 'utils/dom';\nimport { cleanContent } from 'cleaners';\nimport { normalizeSpaces } from 'utils/text';\n\nimport extractBestNode from './extract-best-node';\n\nconst GenericContentExtractor = {\n defaultOpts: {\n stripUnlikelyCandidates: true,\n weightNodes: true,\n cleanConditionally: true,\n },\n\n // Extract the content for this resource - initially, pass in our\n // most restrictive opts which will return the highest quality\n // content. On each failure, retry with slightly more lax opts.\n //\n // :param return_type: string. If \"node\", should return the content\n // as a cheerio node rather than as an HTML string.\n //\n // Opts:\n // stripUnlikelyCandidates: Remove any elements that match\n // non-article-like criteria first.(Like, does this element\n // have a classname of \"comment\")\n //\n // weightNodes: Modify an elements score based on whether it has\n // certain classNames or IDs. Examples: Subtract if a node has\n // a className of 'comment', Add if a node has an ID of\n // 'entry-content'.\n //\n // cleanConditionally: Clean the node to return of some\n // superfluous content. Things like forms, ads, etc.\n extract({ $, html, title, url }, opts) {\n opts = { ...this.defaultOpts, ...opts };\n\n $ = $ || cheerio.load(html);\n\n // Cascade through our extraction-specific opts in an ordered fashion,\n // turning them off as we try to extract content.\n let node = this.getContentNode($, title, url, opts);\n\n if (nodeIsSufficient(node)) {\n return this.cleanAndReturnNode(node, $);\n }\n\n // We didn't succeed on first pass, one by one disable our\n // extraction opts and try again.\n for (const key of Reflect.ownKeys(opts).filter(k => opts[k] === true)) {\n opts[key] = false;\n $ = cheerio.load(html);\n\n node = this.getContentNode($, title, url, opts);\n\n if (nodeIsSufficient(node)) {\n break;\n }\n }\n\n return this.cleanAndReturnNode(node, $);\n },\n\n // Get node given current options\n getContentNode($, title, url, opts) {\n return cleanContent(\n extractBestNode($, opts),\n {\n $,\n cleanConditionally: opts.cleanConditionally,\n title,\n url,\n });\n },\n\n // Once we got here, either we're at our last-resort node, or\n // we broke early. Make sure we at least have -something- before we\n // move forward.\n cleanAndReturnNode(node, $) {\n if (!node) {\n return null;\n }\n\n return normalizeSpaces($.html(node));\n\n // if return_type == \"html\":\n // return normalize_spaces(node_to_html(node))\n // else:\n // return node\n },\n\n};\n\nexport default GenericContentExtractor;\n","// TODO: It would be great if we could merge the meta and selector lists into\n// a list of objects, because we could then rank them better. For example,\n// .hentry .entry-title is far better suited than .\n\n// An ordered list of meta tag names that denote likely article titles. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\nexport const STRONG_TITLE_META_TAGS = [\n 'tweetmeme-title',\n 'dc.title',\n 'rbtitle',\n 'headline',\n 'title',\n];\n\n// og:title is weak because it typically contains context that we don't like,\n// for example the source site's name. Gotta get that brand into facebook!\nexport const WEAK_TITLE_META_TAGS = [\n 'og:title',\n];\n\n// An ordered list of XPath Selectors to find likely article titles. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nexport const STRONG_TITLE_SELECTORS = [\n '.hentry .entry-title',\n 'h1#articleHeader',\n 'h1.articleHeader',\n 'h1.article',\n '.instapaper_title',\n '#meebo-title',\n];\n\nexport const WEAK_TITLE_SELECTORS = [\n 'article h1',\n '#entry-title',\n '.entry-title',\n '#entryTitle',\n '#entrytitle',\n '.entryTitle',\n '.entrytitle',\n '#articleTitle',\n '.articleTitle',\n 'post post-title',\n 'h1.title',\n 'h2.article',\n 'h1',\n 'html head title',\n 'title',\n];\n","import { cleanTitle } from 'cleaners';\nimport {\n extractFromMeta,\n extractFromSelectors,\n} from 'utils/dom';\n\nimport {\n STRONG_TITLE_META_TAGS,\n WEAK_TITLE_META_TAGS,\n STRONG_TITLE_SELECTORS,\n WEAK_TITLE_SELECTORS,\n} from './constants';\n\nconst GenericTitleExtractor = {\n extract({ $, url, metaCache }) {\n // First, check to see if we have a matching meta tag that we can make\n // use of that is strongly associated with the headline.\n let title;\n\n title = extractFromMeta($, STRONG_TITLE_META_TAGS, metaCache);\n if (title) return cleanTitle(title, { url, $ });\n\n // Second, look through our content selectors for the most likely\n // article title that is strongly associated with the headline.\n title = extractFromSelectors($, STRONG_TITLE_SELECTORS);\n if (title) return cleanTitle(title, { url, $ });\n\n // Third, check for weaker meta tags that may match.\n title = extractFromMeta($, WEAK_TITLE_META_TAGS, metaCache);\n if (title) return cleanTitle(title, { url, $ });\n\n // Last, look for weaker selector tags that may match.\n title = extractFromSelectors($, WEAK_TITLE_SELECTORS);\n if (title) return cleanTitle(title, { url, $ });\n\n // If no matches, return an empty string\n return '';\n },\n};\n\nexport default GenericTitleExtractor;\n","// An ordered list of meta tag names that denote likely article authors. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\n//\n// Note: \"author\" is too often the -developer- of the page, so it is not\n// added here.\nexport const AUTHOR_META_TAGS = [\n 'byl',\n 'clmst',\n 'dc.author',\n 'dcsext.author',\n 'dc.creator',\n 'rbauthors',\n 'authors',\n];\n\nexport const AUTHOR_MAX_LENGTH = 300;\n\n// An ordered list of XPath Selectors to find likely article authors. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nexport const AUTHOR_SELECTORS = [\n '.entry .entry-author',\n '.author.vcard .fn',\n '.author .vcard .fn',\n '.byline.vcard .fn',\n '.byline .vcard .fn',\n '.byline .by .author',\n '.byline .by',\n '.byline .author',\n '.post-author.vcard',\n '.post-author .vcard',\n 'a[rel=author]',\n '#by_author',\n '.by_author',\n '#entryAuthor',\n '.entryAuthor',\n '.byline a[href*=author]',\n '#author .authorname',\n '.author .authorname',\n '#author',\n '.author',\n '.articleauthor',\n '.ArticleAuthor',\n '.byline',\n];\n\n// An ordered list of Selectors to find likely article authors, with\n// regular expression for content.\nconst bylineRe = /^[\\n\\s]*By/i;\nexport const BYLINE_SELECTORS_RE = [\n ['#byline', bylineRe],\n ['.byline', bylineRe],\n];\n","import { cleanAuthor } from 'cleaners';\nimport {\n extractFromMeta,\n extractFromSelectors,\n} from 'utils/dom';\n\nimport {\n AUTHOR_META_TAGS,\n AUTHOR_MAX_LENGTH,\n AUTHOR_SELECTORS,\n BYLINE_SELECTORS_RE,\n} from './constants';\n\nconst GenericAuthorExtractor = {\n extract({ $, metaCache }) {\n let author;\n\n // First, check to see if we have a matching\n // meta tag that we can make use of.\n author = extractFromMeta($, AUTHOR_META_TAGS, metaCache);\n if (author && author.length < AUTHOR_MAX_LENGTH) {\n return cleanAuthor(author);\n }\n\n // Second, look through our selectors looking for potential authors.\n author = extractFromSelectors($, AUTHOR_SELECTORS, 2);\n if (author && author.length < AUTHOR_MAX_LENGTH) {\n return cleanAuthor(author);\n }\n\n // Last, use our looser regular-expression based selectors for\n // potential authors.\n for (const [selector, regex] of BYLINE_SELECTORS_RE) {\n const node = $(selector);\n if (node.length === 1) {\n const text = node.text();\n if (regex.test(text)) {\n return cleanAuthor(text);\n }\n }\n }\n\n return null;\n },\n};\n\nexport default GenericAuthorExtractor;\n","// An ordered list of meta tag names that denote\n// likely date published dates. All attributes\n// should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nexport const DATE_PUBLISHED_META_TAGS = [\n 'article:published_time',\n 'displaydate',\n 'dc.date',\n 'dc.date.issued',\n 'rbpubdate',\n 'publish_date',\n 'pub_date',\n 'pagedate',\n 'pubdate',\n 'revision_date',\n 'doc_date',\n 'date_created',\n 'content_create_date',\n 'lastmodified',\n 'created',\n 'date',\n];\n\n// An ordered list of XPath Selectors to find\n// likely date published dates. From most explicit\n// to least explicit.\nexport const DATE_PUBLISHED_SELECTORS = [\n '.hentry .dtstamp.published',\n '.hentry .published',\n '.hentry .dtstamp.updated',\n '.hentry .updated',\n '.single .published',\n '.meta .published',\n '.meta .postDate',\n '.entry-date',\n '.byline .date',\n '.postmetadata .date',\n '.article_datetime',\n '.date-header',\n '.story-date',\n '.dateStamp',\n '#story .datetime',\n '.dateline',\n '.pubdate',\n];\n\n// An ordered list of compiled regular expressions to find likely date\n// published dates from the URL. These should always have the first\n// reference be a date string that is parseable by dateutil.parser.parse\nconst abbrevMonthsStr = '(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)';\nexport const DATE_PUBLISHED_URL_RES = [\n // /2012/01/27/ but not /2012/01/293\n new RegExp('/(20\\\\d{2}/\\\\d{2}/\\\\d{2})/', 'i'),\n // 20120127 or 20120127T but not 2012012733 or 8201201733\n // /[^0-9](20\\d{2}[01]\\d[0-3]\\d)([^0-9]|$)/i,\n // 2012-01-27\n new RegExp('(20\\\\d{2}-[01]\\\\d-[0-3]\\\\d)', 'i'),\n // /2012/jan/27/\n new RegExp(`/(20\\\\d{2}/${abbrevMonthsStr}/[0-3]\\\\d)/`, 'i'),\n];\n","import { cleanDatePublished } from 'cleaners';\nimport {\n extractFromMeta,\n extractFromSelectors,\n} from 'utils/dom';\nimport { extractFromUrl } from 'utils/text';\n\nimport {\n DATE_PUBLISHED_META_TAGS,\n DATE_PUBLISHED_SELECTORS,\n DATE_PUBLISHED_URL_RES,\n} from './constants';\n\nconst GenericDatePublishedExtractor = {\n extract({ $, url, metaCache }) {\n let datePublished;\n // First, check to see if we have a matching meta tag\n // that we can make use of.\n // Don't try cleaning tags from this string\n datePublished = extractFromMeta($, DATE_PUBLISHED_META_TAGS, metaCache, false);\n if (datePublished) return cleanDatePublished(datePublished);\n\n // Second, look through our selectors looking for potential\n // date_published's.\n datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS);\n if (datePublished) return cleanDatePublished(datePublished);\n\n // Lastly, look to see if a dately string exists in the URL\n datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);\n if (datePublished) return cleanDatePublished(datePublished);\n\n return null;\n },\n};\n\nexport default GenericDatePublishedExtractor;\n","// import {\n// DEK_META_TAGS,\n// DEK_SELECTORS,\n// DEK_URL_RES,\n// } from './constants';\n\n// import { cleanDek } from 'cleaners';\n\n// import {\n// extractFromMeta,\n// extractFromSelectors,\n// } from 'utils/dom';\n\n// Currently there is only one selector for\n// deks. We should simply return null here\n// until we have a more robust generic option.\n// Below is the original source for this, for reference.\nconst GenericDekExtractor = {\n // extract({ $, content, metaCache }) {\n extract() {\n return null;\n },\n};\n\nexport default GenericDekExtractor;\n\n// def extract_dek(self):\n// # First, check to see if we have a matching meta tag that we can make\n// # use of.\n// dek = self.extract_from_meta('dek', constants.DEK_META_TAGS)\n// if not dek:\n// # Second, look through our CSS/XPath selectors. This may return\n// # an HTML fragment.\n// dek = self.extract_from_selectors('dek',\n// constants.DEK_SELECTORS,\n// text_only=False)\n//\n// if dek:\n// # Make sure our dek isn't in the first few thousand characters\n// # of the content, otherwise it's just the start of the article\n// # and not a true dek.\n// content = self.extract_content()\n// content_chunk = normalize_spaces(strip_tags(content[:2000]))\n// dek_chunk = normalize_spaces(dek[:100]) # Already has no tags.\n//\n// # 80% or greater similarity means the dek was very similar to some\n// # of the starting content, so we skip it.\n// if fuzz.partial_ratio(content_chunk, dek_chunk) < 80:\n// return dek\n//\n// return None\n","// An ordered list of meta tag names that denote likely article leading images.\n// All attributes should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nexport const LEAD_IMAGE_URL_META_TAGS = [\n 'og:image',\n 'twitter:image',\n 'image_src',\n];\n\nexport const LEAD_IMAGE_URL_SELECTORS = [\n 'link[rel=image_src]',\n];\n\nexport const POSITIVE_LEAD_IMAGE_URL_HINTS = [\n 'upload',\n 'wp-content',\n 'large',\n 'photo',\n 'wp-image',\n];\nexport const POSITIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(POSITIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');\n\nexport const NEGATIVE_LEAD_IMAGE_URL_HINTS = [\n 'spacer',\n 'sprite',\n 'blank',\n 'throbber',\n 'gradient',\n 'tile',\n 'bg',\n 'background',\n 'icon',\n 'social',\n 'header',\n 'hdr',\n 'advert',\n 'spinner',\n 'loader',\n 'loading',\n 'default',\n 'rating',\n 'share',\n 'facebook',\n 'twitter',\n 'theme',\n 'promo',\n 'ads',\n 'wp-includes',\n];\nexport const NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');\n\nexport const GIF_RE = /\\.gif(\\?.*)?$/i;\nexport const JPG_RE = /\\.jpe?g(\\?.*)?$/i;\n","import {\n POSITIVE_LEAD_IMAGE_URL_HINTS_RE,\n NEGATIVE_LEAD_IMAGE_URL_HINTS_RE,\n GIF_RE,\n JPG_RE,\n} from './constants';\n\nimport { PHOTO_HINTS_RE } from '../content/scoring/constants';\n\nfunction getSig($node) {\n return `${$node.attr('class') || ''} ${$node.attr('id') || ''}`;\n}\n\n// Scores image urls based on a variety of heuristics.\nexport function scoreImageUrl(url) {\n url = url.trim();\n let score = 0;\n\n if (POSITIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n score += 20;\n }\n\n if (NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n score -= 20;\n }\n\n // TODO: We might want to consider removing this as\n // gifs are much more common/popular than they once were\n if (GIF_RE.test(url)) {\n score -= 10;\n }\n\n if (JPG_RE.test(url)) {\n score += 10;\n }\n\n // PNGs are neutral.\n\n return score;\n}\n\n// Alt attribute usually means non-presentational image.\nexport function scoreAttr($img) {\n if ($img.attr('alt')) {\n return 5;\n }\n\n return 0;\n}\n\n// Look through our parent and grandparent for figure-like\n// container elements, give a bonus if we find them\nexport function scoreByParents($img) {\n let score = 0;\n const $figParent = $img.parents('figure').first();\n\n if ($figParent.length === 1) {\n score += 25;\n }\n\n const $parent = $img.parent();\n let $gParent;\n if ($parent.length === 1) {\n $gParent = $parent.parent();\n }\n\n [$parent, $gParent].forEach(($node) => {\n if (PHOTO_HINTS_RE.test(getSig($node))) {\n score += 15;\n }\n });\n\n return score;\n}\n\n// Look at our immediate sibling and see if it looks like it's a\n// caption. Bonus if so.\nexport function scoreBySibling($img) {\n let score = 0;\n const $sibling = $img.next();\n const sibling = $sibling.get(0);\n\n if (sibling && sibling.tagName === 'figcaption') {\n score += 25;\n }\n\n if (PHOTO_HINTS_RE.test(getSig($sibling))) {\n score += 15;\n }\n\n return score;\n}\n\nexport function scoreByDimensions($img) {\n let score = 0;\n\n const width = parseFloat($img.attr('width'));\n const height = parseFloat($img.attr('height'));\n const src = $img.attr('src');\n\n // Penalty for skinny images\n if (width && width <= 50) {\n score -= 50;\n }\n\n // Penalty for short images\n if (height && height <= 50) {\n score -= 50;\n }\n\n if (width && height && !src.includes('sprite')) {\n const area = width * height;\n if (area < 5000) { // Smaller than 50 x 100\n score -= 100;\n } else {\n score += Math.round(area / 1000);\n }\n }\n\n return score;\n}\n\nexport function scoreByPosition($imgs, index) {\n return ($imgs.length / 2) - index;\n}\n","import { extractFromMeta } from 'utils/dom';\nimport { cleanImage } from 'cleaners';\n\nimport {\n LEAD_IMAGE_URL_META_TAGS,\n LEAD_IMAGE_URL_SELECTORS,\n} from './constants';\n\nimport {\n scoreImageUrl,\n scoreAttr,\n scoreByParents,\n scoreBySibling,\n scoreByDimensions,\n scoreByPosition,\n} from './score-image';\n\n// Given a resource, try to find the lead image URL from within\n// it. Like content and next page extraction, uses a scoring system\n// to determine what the most likely image may be. Short circuits\n// on really probable things like og:image meta tags.\n//\n// Potential signals to still take advantage of:\n// * domain\n// * weird aspect ratio\nconst GenericLeadImageUrlExtractor = {\n extract({ $, content, metaCache }) {\n let cleanUrl;\n\n // Check to see if we have a matching meta tag that we can make use of.\n // Moving this higher because common practice is now to use large\n // images on things like Open Graph or Twitter cards.\n // images usually have for things like Open Graph.\n const imageUrl =\n extractFromMeta(\n $,\n LEAD_IMAGE_URL_META_TAGS,\n metaCache,\n false\n );\n\n if (imageUrl) {\n cleanUrl = cleanImage(imageUrl);\n\n if (cleanUrl) return cleanUrl;\n }\n\n // Next, try to find the \"best\" image via the content.\n // We'd rather not have to fetch each image and check dimensions,\n // so try to do some analysis and determine them instead.\n const imgs = $('img', content).toArray();\n const imgScores = {};\n\n imgs.forEach((img, index) => {\n const $img = $(img);\n const src = $img.attr('src');\n\n if (!src) return;\n\n let score = scoreImageUrl(src);\n score += scoreAttr($img);\n score += scoreByParents($img);\n score += scoreBySibling($img);\n score += scoreByDimensions($img);\n score += scoreByPosition(imgs, index);\n\n imgScores[src] = score;\n });\n\n const [topUrl, topScore] =\n Reflect.ownKeys(imgScores).reduce((acc, key) =>\n imgScores[key] > acc[1] ? [key, imgScores[key]] : acc\n , [null, 0]);\n\n if (topScore > 0) {\n cleanUrl = cleanImage(topUrl);\n\n if (cleanUrl) return cleanUrl;\n }\n\n // If nothing else worked, check to see if there are any really\n // probable nodes in the doc, like .\n for (const selector of LEAD_IMAGE_URL_SELECTORS) {\n const $node = $(selector).first();\n const src = $node.attr('src');\n if (src) {\n cleanUrl = cleanImage(src);\n if (cleanUrl) return cleanUrl;\n }\n\n const href = $node.attr('href');\n if (href) {\n cleanUrl = cleanImage(href);\n if (cleanUrl) return cleanUrl;\n }\n\n const value = $node.attr('value');\n if (value) {\n cleanUrl = cleanImage(value);\n if (cleanUrl) return cleanUrl;\n }\n }\n\n return null;\n },\n};\n\nexport default GenericLeadImageUrlExtractor;\n\n// def extract(self):\n// \"\"\"\n// # First, try to find the \"best\" image via the content.\n// # We'd rather not have to fetch each image and check dimensions,\n// # so try to do some analysis and determine them instead.\n// content = self.extractor.extract_content(return_type=\"node\")\n// imgs = content.xpath('.//img')\n// img_scores = defaultdict(int)\n// logger.debug('Scoring %d images from content', len(imgs))\n// for (i, img) in enumerate(imgs):\n// img_score = 0\n//\n// if not 'src' in img.attrib:\n// logger.debug('No src attribute found')\n// continue\n//\n// try:\n// parsed_img = urlparse(img.attrib['src'])\n// img_path = parsed_img.path.lower()\n// except ValueError:\n// logger.debug('ValueError getting img path.')\n// continue\n// logger.debug('Image path is %s', img_path)\n//\n// if constants.POSITIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):\n// logger.debug('Positive URL hints match. Adding 20.')\n// img_score += 20\n//\n// if constants.NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):\n// logger.debug('Negative URL hints match. Subtracting 20.')\n// img_score -= 20\n//\n// # Gifs are more often structure than photos\n// if img_path.endswith('gif'):\n// logger.debug('gif found. Subtracting 10.')\n// img_score -= 10\n//\n// # JPGs are more often photographs\n// if img_path.endswith('jpg'):\n// logger.debug('jpg found. Adding 10.')\n// img_score += 10\n//\n// # PNGs are neutral.\n//\n// # Alt attribute usually means non-presentational image.\n// if 'alt' in img.attrib and len(img.attrib['alt']) > 5:\n// logger.debug('alt attribute found. Adding 5.')\n// img_score += 5\n//\n// # Look through our parent and grandparent for figure-like\n// # container elements, give a bonus if we find them\n// parents = [img.getparent()]\n// if parents[0] is not None and parents[0].getparent() is not None:\n// parents.append(parents[0].getparent())\n// for p in parents:\n// if p.tag == 'figure':\n// logger.debug('Parent with
tag found. Adding 25.')\n// img_score += 25\n//\n// p_sig = ' '.join([p.get('id', ''), p.get('class', '')])\n// if constants.PHOTO_HINTS_RE.search(p_sig):\n// logger.debug('Photo hints regex match. Adding 15.')\n// img_score += 15\n//\n// # Look at our immediate sibling and see if it looks like it's a\n// # caption. Bonus if so.\n// sibling = img.getnext()\n// if sibling is not None:\n// if sibling.tag == 'figcaption':\n// img_score += 25\n//\n// sib_sig = ' '.join([sibling.get('id', ''),\n// sibling.get('class', '')]).lower()\n// if 'caption' in sib_sig:\n// img_score += 15\n//\n// # Pull out width/height if they were set.\n// img_width = None\n// img_height = None\n// if 'width' in img.attrib:\n// try:\n// img_width = float(img.get('width'))\n// except ValueError:\n// pass\n// if 'height' in img.attrib:\n// try:\n// img_height = float(img.get('height'))\n// except ValueError:\n// pass\n//\n// # Penalty for skinny images\n// if img_width and img_width <= 50:\n// logger.debug('Skinny image found. Subtracting 50.')\n// img_score -= 50\n//\n// # Penalty for short images\n// if img_height and img_height <= 50:\n// # Wide, short images are more common than narrow, tall ones\n// logger.debug('Short image found. Subtracting 25.')\n// img_score -= 25\n//\n// if img_width and img_height and not 'sprite' in img_path:\n// area = img_width * img_height\n//\n// if area < 5000: # Smaller than 50x100\n// logger.debug('Image with small area found. Subtracting 100.')\n// img_score -= 100\n// else:\n// img_score += round(area/1000.0)\n//\n// # If the image is higher on the page than other images,\n// # it gets a bonus. Penalty if lower.\n// logger.debug('Adding page placement bonus of %d.', len(imgs)/2 - i)\n// img_score += len(imgs)/2 - i\n//\n// # Use the raw src here because we munged img_path for case\n// # insensitivity\n// logger.debug('Final score is %d.', img_score)\n// img_scores[img.attrib['src']] += img_score\n//\n// top_score = 0\n// top_url = None\n// for (url, score) in img_scores.items():\n// if score > top_score:\n// top_url = url\n// top_score = score\n//\n// if top_score > 0:\n// logger.debug('Using top score image from content. Score was %d', top_score)\n// return top_url\n//\n//\n// # If nothing else worked, check to see if there are any really\n// # probable nodes in the doc, like .\n// logger.debug('Trying to find lead image in probable nodes')\n// for selector in constants.LEAD_IMAGE_URL_SELECTORS:\n// nodes = self.resource.extract_by_selector(selector)\n// for node in nodes:\n// clean_value = None\n// if node.attrib.get('src'):\n// clean_value = self.clean(node.attrib['src'])\n//\n// if not clean_value and node.attrib.get('href'):\n// clean_value = self.clean(node.attrib['href'])\n//\n// if not clean_value and node.attrib.get('value'):\n// clean_value = self.clean(node.attrib['value'])\n//\n// if clean_value:\n// logger.debug('Found lead image in probable nodes.')\n// logger.debug('Node was: %s', node)\n// return clean_value\n//\n// return None\n","import difflib from 'difflib';\n\nexport default function scoreSimilarity(score, articleUrl, href) {\n // Do this last and only if we have a real candidate, because it's\n // potentially expensive computationally. Compare the link to this\n // URL using difflib to get the % similarity of these URLs. On a\n // sliding scale, subtract points from this link based on\n // similarity.\n if (score > 0) {\n const similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio();\n // Subtract .1 from diff_percent when calculating modifier,\n // which means that if it's less than 10% different, we give a\n // bonus instead. Ex:\n // 3% different = +17.5 points\n // 10% different = 0 points\n // 20% different = -25 points\n const diffPercent = 1.0 - similarity;\n const diffModifier = -(250 * (diffPercent - 0.2));\n return score + diffModifier;\n }\n\n return 0;\n}\n","import { IS_DIGIT_RE } from 'utils/text/constants';\n\nexport default function scoreLinkText(linkText, pageNum) {\n // If the link text can be parsed as a number, give it a minor\n // bonus, with a slight bias towards lower numbered pages. This is\n // so that pages that might not have 'next' in their text can still\n // get scored, and sorted properly by score.\n let score = 0;\n\n if (IS_DIGIT_RE.test(linkText.trim())) {\n const linkTextAsNum = parseInt(linkText, 10);\n // If it's the first page, we already got it on the first call.\n // Give it a negative score. Otherwise, up to page 10, give a\n // small bonus.\n if (linkTextAsNum < 2) {\n score = -30;\n } else {\n score = Math.max(0, 10 - linkTextAsNum);\n }\n\n // If it appears that the current page number is greater than\n // this links page number, it's a very bad sign. Give it a big\n // penalty.\n if (pageNum && pageNum >= linkTextAsNum) {\n score -= 50;\n }\n }\n\n return score;\n}\n","export default function scorePageInLink(pageNum, isWp) {\n // page in the link = bonus. Intentionally ignore wordpress because\n // their ?p=123 link style gets caught by this even though it means\n // separate documents entirely.\n if (pageNum && !isWp) {\n return 50;\n }\n\n return 0;\n}\n","export const DIGIT_RE = /\\d/;\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nexport const EXTRANEOUS_LINK_HINTS = [\n 'print',\n 'archive',\n 'comment',\n 'discuss',\n 'e-mail',\n 'email',\n 'share',\n 'reply',\n 'all',\n 'login',\n 'sign',\n 'single',\n 'adx',\n 'entry-unrelated',\n];\nexport const EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\nexport const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^|]|$)|»([^|]|$))', 'i');\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nexport const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nexport const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nexport const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n","import { EXTRANEOUS_LINK_HINTS_RE } from '../constants';\n\nexport default function scoreExtraneousLinks(href) {\n // If the URL itself contains extraneous values, give a penalty.\n if (EXTRANEOUS_LINK_HINTS_RE.test(href)) {\n return -25;\n }\n\n return 0;\n}\n","import { range } from 'utils';\nimport {\n NEGATIVE_SCORE_RE,\n POSITIVE_SCORE_RE,\n PAGE_RE,\n} from 'utils/dom/constants';\nimport { EXTRANEOUS_LINK_HINTS_RE } from '../constants';\n\nfunction makeSig($link) {\n return `${$link.attr('class') || ''} ${$link.attr('id') || ''}`;\n}\n\nexport default function scoreByParents($link) {\n // If a parent node contains paging-like classname or id, give a\n // bonus. Additionally, if a parent_node contains bad content\n // (like 'sponsor'), give a penalty.\n let $parent = $link.parent();\n let positiveMatch = false;\n let negativeMatch = false;\n let score = 0;\n\n Array.from(range(0, 4)).forEach(() => {\n if ($parent.length === 0) {\n return;\n }\n\n const parentData = makeSig($parent, ' ');\n\n // If we have 'page' or 'paging' in our data, that's a good\n // sign. Add a bonus.\n if (!positiveMatch && PAGE_RE.test(parentData)) {\n positiveMatch = true;\n score += 25;\n }\n\n // If we have 'comment' or something in our data, and\n // we don't have something like 'content' as well, that's\n // a bad sign. Give a penalty.\n if (!negativeMatch && NEGATIVE_SCORE_RE.test(parentData)\n && EXTRANEOUS_LINK_HINTS_RE.test(parentData)) {\n if (!POSITIVE_SCORE_RE.test(parentData)) {\n negativeMatch = true;\n score -= 25;\n }\n }\n\n $parent = $parent.parent();\n });\n\n return score;\n}\n","import { PREV_LINK_TEXT_RE } from '../constants';\n\nexport default function scorePrevLink(linkData) {\n // If the link has something like \"previous\", its definitely\n // an old link, skip it.\n if (PREV_LINK_TEXT_RE.test(linkData)) {\n return -200;\n }\n\n return 0;\n}\n","import URL from 'url';\n\nimport {\n DIGIT_RE,\n EXTRANEOUS_LINK_HINTS_RE,\n} from '../constants';\n\nexport default function shouldScore(\n href,\n articleUrl,\n baseUrl,\n parsedUrl,\n linkText,\n previousUrls\n) {\n // skip if we've already fetched this url\n if (previousUrls.find(url => href === url) !== undefined) {\n return false;\n }\n\n // If we've already parsed this URL, or the URL matches the base\n // URL, or is empty, skip it.\n if (!href || href === articleUrl || href === baseUrl) {\n return false;\n }\n\n const { hostname } = parsedUrl;\n const { hostname: linkHost } = URL.parse(href);\n\n // Domain mismatch.\n if (linkHost !== hostname) {\n return false;\n }\n\n // If href doesn't contain a digit after removing the base URL,\n // it's certainly not the next page.\n const fragment = href.replace(baseUrl, '');\n if (!DIGIT_RE.test(fragment)) {\n return false;\n }\n\n // This link has extraneous content (like \"comment\") in its link\n // text, so we skip it.\n if (EXTRANEOUS_LINK_HINTS_RE.test(linkText)) {\n return false;\n }\n\n // Next page link text is never long, skip if it is too long.\n if (linkText.length > 25) {\n return false;\n }\n\n return true;\n}\n","export default function scoreBaseUrl(href, baseRegex) {\n // If the baseUrl isn't part of this URL, penalize this\n // link. It could still be the link, but the odds are lower.\n // Example:\n // http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html\n if (!baseRegex.test(href)) {\n return -25;\n }\n\n return 0;\n}\n","import { NEXT_LINK_TEXT_RE } from '../constants';\n\nexport default function scoreNextLinkText(linkData) {\n // Things like \"next\", \">>\", etc.\n if (NEXT_LINK_TEXT_RE.test(linkData)) {\n return 50;\n }\n\n return 0;\n}\n","import {\n NEXT_LINK_TEXT_RE,\n CAP_LINK_TEXT_RE,\n} from '../constants';\n\nexport default function scoreCapLinks(linkData) {\n // Cap links are links like \"last\", etc.\n if (CAP_LINK_TEXT_RE.test(linkData)) {\n // If we found a link like \"last\", but we've already seen that\n // this link is also \"next\", it's fine. If it's not been\n // previously marked as \"next\", then it's probably bad.\n // Penalize.\n if (NEXT_LINK_TEXT_RE.test(linkData)) {\n return -65;\n }\n }\n\n return 0;\n}\n","import URL from 'url';\n\nimport { isWordpress } from 'utils/dom';\nimport {\n removeAnchor,\n pageNumFromUrl,\n} from 'utils/text';\n\nimport {\n scoreSimilarity,\n scoreLinkText,\n scorePageInLink,\n scoreExtraneousLinks,\n scoreByParents,\n scorePrevLink,\n shouldScore,\n scoreBaseUrl,\n scoreCapLinks,\n scoreNextLinkText,\n} from './utils';\n\nexport function makeBaseRegex(baseUrl) {\n return new RegExp(`^${baseUrl}`, 'i');\n}\n\nfunction makeSig($link, linkText) {\n return `${linkText || $link.text()} ${$link.attr('class') || ''} ${$link.attr('id') || ''}`;\n}\n\nexport default function scoreLinks({\n links,\n articleUrl,\n baseUrl,\n parsedUrl,\n $,\n previousUrls = [],\n}) {\n parsedUrl = parsedUrl || URL.parse(articleUrl);\n const baseRegex = makeBaseRegex(baseUrl);\n const isWp = isWordpress($);\n\n // Loop through all links, looking for hints that they may be next-page\n // links. Things like having \"page\" in their textContent, className or\n // id, or being a child of a node with a page-y className or id.\n //\n // After we do that, assign each page a score, and pick the one that\n // looks most like the next page link, as long as its score is strong\n // enough to have decent confidence.\n const scoredPages = links.reduce((possiblePages, link) => {\n // Remove any anchor data since we don't do a good job\n // standardizing URLs (it's hard), we're going to do\n // some checking with and without a trailing slash\n const href = removeAnchor(link.attribs.href);\n const $link = $(link);\n const linkText = $link.text();\n\n if (!shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls)) {\n return possiblePages;\n }\n\n // ## PASSED THE FIRST-PASS TESTS. Start scoring. ##\n if (!possiblePages[href]) {\n possiblePages[href] = {\n score: 0,\n linkText,\n href,\n };\n } else {\n possiblePages[href].linkText = `${possiblePages[href].linkText}|${linkText}`;\n }\n\n const possiblePage = possiblePages[href];\n const linkData = makeSig($link, linkText);\n const pageNum = pageNumFromUrl(href);\n\n let score = scoreBaseUrl(href, baseRegex);\n score += scoreNextLinkText(linkData);\n score += scoreCapLinks(linkData);\n score += scorePrevLink(linkData);\n score += scoreByParents($link);\n score += scoreExtraneousLinks(href);\n score += scorePageInLink(pageNum, isWp);\n score += scoreLinkText(linkText, pageNum);\n score += scoreSimilarity(score, articleUrl, href);\n\n possiblePage.score = score;\n\n return possiblePages;\n }, {});\n\n return Reflect.ownKeys(scoredPages).length === 0 ? null : scoredPages;\n}\n","import URL from 'url';\n\nimport {\n articleBaseUrl,\n removeAnchor,\n} from 'utils/text';\nimport scoreLinks from './scoring/score-links';\n\n// Looks for and returns next page url\n// for multi-page articles\nconst GenericNextPageUrlExtractor = {\n extract({ $, url, parsedUrl, previousUrls = [] }) {\n parsedUrl = parsedUrl || URL.parse(url);\n\n const articleUrl = removeAnchor(url);\n const baseUrl = articleBaseUrl(url, parsedUrl);\n\n const links = $('a[href]').toArray();\n\n const scoredLinks = scoreLinks({\n links,\n articleUrl,\n baseUrl,\n parsedUrl,\n $,\n previousUrls,\n });\n\n // If no links were scored, return null\n if (!scoredLinks) return null;\n\n // now that we've scored all possible pages,\n // find the biggest one.\n const topPage = Reflect.ownKeys(scoredLinks).reduce((acc, link) => {\n const scoredLink = scoredLinks[link];\n return scoredLink.score > acc.score ? scoredLink : acc;\n }, { score: -100 });\n\n // If the score is less than 50, we're not confident enough to use it,\n // so we fail.\n if (topPage.score >= 50) {\n return topPage.href;\n }\n\n return null;\n },\n};\n\nexport default GenericNextPageUrlExtractor;\n","export const CANONICAL_META_SELECTORS = [\n 'og:url',\n];\n","import URL from 'url';\nimport { extractFromMeta } from 'utils/dom';\n\nimport { CANONICAL_META_SELECTORS } from './constants';\n\nfunction parseDomain(url) {\n const parsedUrl = URL.parse(url);\n const { hostname } = parsedUrl;\n return hostname;\n}\n\nfunction result(url) {\n return {\n url,\n domain: parseDomain(url),\n };\n}\n\nconst GenericUrlExtractor = {\n extract({ $, url, metaCache }) {\n const $canonical = $('link[rel=canonical]');\n if ($canonical.length !== 0) {\n const href = $canonical.attr('href');\n if (href) {\n return result(href);\n }\n }\n\n const metaUrl = extractFromMeta($, CANONICAL_META_SELECTORS, metaCache);\n if (metaUrl) {\n return result(metaUrl);\n }\n\n return result(url);\n },\n\n};\n\nexport default GenericUrlExtractor;\n","export const EXCERPT_META_SELECTORS = [\n 'og:description',\n 'twitter:description',\n];\n","import ellipsize from 'ellipsize';\n\nimport {\n extractFromMeta,\n stripTags,\n} from 'utils/dom';\n\nimport { EXCERPT_META_SELECTORS } from './constants';\n\nexport function clean(content, $, maxLength = 200) {\n content = content.replace(/[\\s\\n]+/g, ' ').trim();\n return ellipsize(content, maxLength, { ellipse: '…' });\n}\n\nconst GenericExcerptExtractor = {\n extract({ $, content, metaCache }) {\n const excerpt = extractFromMeta($, EXCERPT_META_SELECTORS, metaCache);\n if (excerpt) {\n return clean(stripTags(excerpt, $));\n }\n // Fall back to excerpting from the extracted content\n const maxLength = 200;\n const shortContent = content.slice(0, maxLength * 5);\n return clean($(shortContent).text(), $, maxLength);\n },\n};\n\nexport default GenericExcerptExtractor;\n","import cheerio from 'cheerio';\n\nimport { normalizeSpaces } from 'utils/text';\n\nconst GenericWordCountExtractor = {\n extract({ content }) {\n const $ = cheerio.load(content);\n\n const text = normalizeSpaces($('div').first().text());\n return text.split(/\\s/).length;\n },\n};\n\nexport default GenericWordCountExtractor;\n","import cheerio from 'cheerio';\nimport stringDirection from 'string-direction';\n\nimport GenericContentExtractor from './content/extractor';\nimport GenericTitleExtractor from './title/extractor';\nimport GenericAuthorExtractor from './author/extractor';\nimport GenericDatePublishedExtractor from './date-published/extractor';\nimport GenericDekExtractor from './dek/extractor';\nimport GenericLeadImageUrlExtractor from './lead-image-url/extractor';\nimport GenericNextPageUrlExtractor from './next-page-url/extractor';\nimport GenericUrlExtractor from './url/extractor';\nimport GenericExcerptExtractor from './excerpt/extractor';\nimport GenericWordCountExtractor from './word-count/extractor';\n\nconst GenericExtractor = {\n // This extractor is the default for all domains\n domain: '*',\n title: GenericTitleExtractor.extract,\n date_published: GenericDatePublishedExtractor.extract,\n author: GenericAuthorExtractor.extract,\n content: GenericContentExtractor.extract.bind(GenericContentExtractor),\n lead_image_url: GenericLeadImageUrlExtractor.extract,\n dek: GenericDekExtractor.extract,\n next_page_url: GenericNextPageUrlExtractor.extract,\n url_and_domain: GenericUrlExtractor.extract,\n excerpt: GenericExcerptExtractor.extract,\n word_count: GenericWordCountExtractor.extract,\n direction: ({ title }) => stringDirection.getDirection(title),\n\n extract(options) {\n const { html } = options;\n\n if (html) {\n const $ = cheerio.load(html);\n options.$ = $;\n }\n\n const title = this.title(options);\n const date_published = this.date_published(options);\n const author = this.author(options);\n const content = this.content({ ...options, title });\n const lead_image_url = this.lead_image_url({ ...options, content });\n const dek = this.dek({ ...options, content });\n const next_page_url = this.next_page_url(options);\n const excerpt = this.excerpt({ ...options, content });\n const word_count = this.word_count({ ...options, content });\n const direction = this.direction({ title });\n const { url, domain } = this.url_and_domain(options);\n\n return {\n title,\n author,\n date_published: date_published || null,\n dek,\n lead_image_url,\n content,\n next_page_url,\n url,\n domain,\n excerpt,\n word_count,\n direction,\n };\n },\n};\n\nexport default GenericExtractor;\n","import URL from 'url';\n\nimport Extractors from './all';\nimport GenericExtractor from './generic';\n\nexport default function getExtractor(url, parsedUrl) {\n parsedUrl = parsedUrl || URL.parse(url);\n const { hostname } = parsedUrl;\n const baseDomain = hostname.split('.').slice(-2).join('.');\n\n return Extractors[hostname] || Extractors[baseDomain] || GenericExtractor;\n}\n","import Cleaners from 'cleaners';\nimport { convertNodeTo } from 'utils/dom';\nimport GenericExtractor from './generic';\n\n// Remove elements by an array of selectors\nexport function cleanBySelectors($content, $, { clean }) {\n if (!clean) return $content;\n\n $(clean.join(','), $content).remove();\n\n return $content;\n}\n\n// Transform matching elements\nexport function transformElements($content, $, { transforms }) {\n if (!transforms) return $content;\n\n Reflect.ownKeys(transforms).forEach((key) => {\n const $matches = $(key, $content);\n const value = transforms[key];\n\n // If value is a string, convert directly\n if (typeof value === 'string') {\n $matches.each((index, node) => {\n convertNodeTo($(node), $, transforms[key]);\n });\n } else if (typeof value === 'function') {\n // If value is function, apply function to node\n $matches.each((index, node) => {\n const result = value($(node), $);\n // If function returns a string, convert node to that value\n if (typeof result === 'string') {\n convertNodeTo($(node), $, result);\n }\n });\n }\n });\n\n return $content;\n}\n\nfunction findMatchingSelector($, selectors) {\n return selectors.find((selector) => {\n if (Array.isArray(selector)) {\n const [s, attr] = selector;\n return $(s).length === 1 && $(s).attr(attr) && $(s).attr(attr).trim() !== '';\n }\n\n return $(selector).length === 1 && $(selector).text().trim() !== '';\n });\n}\n\nexport function select(opts) {\n const { $, type, extractionOpts, extractHtml = false } = opts;\n // Skip if there's not extraction for this type\n if (!extractionOpts) return null;\n\n // If a string is hardcoded for a type (e.g., Wikipedia\n // contributors), return the string\n if (typeof extractionOpts === 'string') return extractionOpts;\n\n const { selectors, defaultCleaner = true } = extractionOpts;\n\n const matchingSelector = findMatchingSelector($, selectors);\n\n if (!matchingSelector) return null;\n\n // Declaring result; will contain either\n // text or html, which will be cleaned\n // by the appropriate cleaner type\n\n // If the selector type requests html as its return type\n // transform and clean the element with provided selectors\n if (extractHtml) {\n let $content = $(matchingSelector);\n\n // Wrap in div so transformation can take place on root element\n $content.wrap($(''));\n $content = $content.parent();\n\n $content = transformElements($content, $, extractionOpts);\n $content = cleanBySelectors($content, $, extractionOpts);\n\n $content = Cleaners[type]($content, { ...opts, defaultCleaner });\n\n return $.html($content);\n }\n\n let result;\n\n // if selector is an array (e.g., ['img', 'src']),\n // extract the attr\n if (Array.isArray(matchingSelector)) {\n const [selector, attr] = matchingSelector;\n result = $(selector).attr(attr).trim();\n } else {\n result = $(matchingSelector).text().trim();\n }\n\n // Allow custom extractor to skip default cleaner\n // for this type; defaults to true\n if (defaultCleaner) {\n return Cleaners[type](result, opts);\n }\n\n return result;\n}\n\nfunction extractResult(opts) {\n const { type, extractor, fallback = true } = opts;\n\n const result = select({ ...opts, extractionOpts: extractor[type] });\n\n // If custom parser succeeds, return the result\n if (result) {\n return result;\n }\n\n // If nothing matches the selector, and fallback is enabled,\n // run the Generic extraction\n if (fallback) return GenericExtractor[type](opts);\n\n return null;\n}\n\nconst RootExtractor = {\n extract(extractor = GenericExtractor, opts) {\n const { contentOnly, extractedTitle } = opts;\n // This is the generic extractor. Run its extract method\n if (extractor.domain === '*') return extractor.extract(opts);\n\n opts = {\n ...opts,\n extractor,\n };\n\n if (contentOnly) {\n const content = extractResult({\n ...opts, type: 'content', extractHtml: true, title: extractedTitle,\n });\n return {\n content,\n };\n }\n const title = extractResult({ ...opts, type: 'title' });\n const date_published = extractResult({ ...opts, type: 'date_published' });\n const author = extractResult({ ...opts, type: 'author' });\n const next_page_url = extractResult({ ...opts, type: 'next_page_url' });\n const content = extractResult({\n ...opts, type: 'content', extractHtml: true, title,\n });\n const lead_image_url = extractResult({ ...opts, type: 'lead_image_url', content });\n const excerpt = extractResult({ ...opts, type: 'excerpt', content });\n const dek = extractResult({ ...opts, type: 'dek', content, excerpt });\n const word_count = extractResult({ ...opts, type: 'word_count', content });\n const direction = extractResult({ ...opts, type: 'direction', title });\n const { url, domain } =\n extractResult({ ...opts, type: 'url_and_domain' }) || { url: null, domain: null };\n\n return {\n title,\n content,\n author,\n date_published,\n lead_image_url,\n dek,\n next_page_url,\n url,\n domain,\n excerpt,\n word_count,\n direction,\n };\n },\n};\n\nexport default RootExtractor;\n","import { removeAnchor } from 'utils/text';\nimport RootExtractor from 'extractors/root-extractor';\nimport GenericExtractor from 'extractors/generic';\nimport Resource from 'resource';\n\nexport default async function collectAllPages(\n {\n next_page_url,\n html,\n $,\n metaCache,\n result,\n Extractor,\n title,\n url,\n }\n) {\n // At this point, we've fetched just the first page\n let pages = 1;\n const previousUrls = [removeAnchor(url)];\n\n // If we've gone over 26 pages, something has\n // likely gone wrong.\n while (next_page_url && pages < 26) {\n pages += 1;\n $ = await Resource.create(next_page_url);\n html = $.html();\n\n const extractorOpts = {\n url: next_page_url,\n html,\n $,\n metaCache,\n contentOnly: true,\n extractedTitle: title,\n previousUrls,\n };\n\n const nextPageResult = RootExtractor.extract(Extractor, extractorOpts);\n\n previousUrls.push(next_page_url);\n result = {\n ...result,\n content: `\n ${result.content}\n \n
` });\n return {\n ...result,\n total_pages: pages,\n pages_rendered: pages,\n word_count,\n };\n}\n","import URL from 'url';\n\nimport Resource from 'resource';\nimport {\n validateUrl,\n Errors,\n} from 'utils';\nimport getExtractor from 'extractors/get-extractor';\nimport RootExtractor from 'extractors/root-extractor';\nimport collectAllPages from 'extractors/collect-all-pages';\n\nconst Mercury = {\n async parse(url, html, opts = {}) {\n const {\n fetchAllPages = true,\n fallback = true,\n } = opts;\n\n const parsedUrl = URL.parse(url);\n\n if (!validateUrl(parsedUrl)) {\n return Errors.badUrl;\n }\n\n const Extractor = getExtractor(url, parsedUrl);\n // console.log(`Using extractor for ${Extractor.domain}`);\n\n const $ = await Resource.create(url, html, parsedUrl);\n\n // If we found an error creating the resource, return that error\n if ($.error) {\n return $;\n }\n\n html = $.html();\n\n // Cached value of every meta name in our document.\n // Used when extracting title/author/date_published/dek\n const metaCache = $('meta').map((_, node) => $(node).attr('name')).toArray();\n\n let result = RootExtractor.extract(Extractor, { url, html, $, metaCache, parsedUrl, fallback });\n const { title, next_page_url } = result;\n\n // Fetch more pages if next_page_url found\n if (fetchAllPages && next_page_url) {\n result = await collectAllPages(\n {\n Extractor,\n next_page_url,\n html,\n $,\n metaCache,\n result,\n title,\n url,\n }\n );\n } else {\n result = {\n ...result,\n total_pages: 1,\n rendered_pages: 1,\n };\n }\n\n return result;\n },\n\n // A convenience method for getting a resource\n // to work with, e.g., for custom extractor generator\n async fetchResource(url) {\n return await Resource.create(url);\n },\n\n};\n\nexport default Mercury;\n"],"names":["range","start","end","validateUrl","hostname","Errors","REQUEST_HEADERS","FETCH_TIMEOUT","BAD_CONTENT_TYPES","BAD_CONTENT_TYPES_RE","RegExp","join","MAX_CONTENT_LENGTH","get","options","resolve","reject","err","response","body","validateResponse","parseNon2xx","statusMessage","statusCode","Error","error","headers","contentType","contentLength","test","url","parsedUrl","URL","parse","encodeURI","badUrl","fetchResource","convertMetaProp","$","from","to","each","_","node","$node","value","attr","removeAttr","normalizeMetaTags","IS_LINK","IS_IMAGE","TAGS_TO_REMOVE","convertLazyLoadedImages","img","attribs","forEach","isComment","index","type","cleanComments","root","find","contents","filter","remove","clean","Resource","preparedResponse","validResponse","result","generateDoc","content","includes","cheerio","load","normalizeWhitespace","children","length","merge","extractor","domains","reduce","acc","domain","mergeSupportedDomains","supportedDomains","BloggerExtractor","NYMagExtractor","$children","tagName","WikipediaExtractor","$parent","parents","prepend","TwitterExtractor","tweets","$tweetContainer","append","replaceWith","NYTimesExtractor","src","width","replace","TheAtlanticExtractor","NewYorkerExtractor","WiredExtractor","MSNExtractor","YahooExtractor","BuzzfeedExtractor","WikiaExtractor","LittleThingsExtractor","PoliticoExtractor","DeadspinExtractor","youtubeId","split","BroadwayWorldExtractor","ApartmentTherapyExtractor","data","JSON","sources","$img","MediumExtractor","ytRe","thumb","decodeURIComponent","match","clone","CustomExtractors","key","SPACER_RE","KEEP_CLASS","KEEP_SELECTORS","STRIP_OUTPUT_TAGS","REMOVE_ATTRS","REMOVE_ATTR_SELECTORS","map","selector","REMOVE_ATTR_LIST","WHITELIST_ATTRS","WHITELIST_ATTRS_RE","REMOVE_EMPTY_TAGS","REMOVE_EMPTY_SELECTORS","tag","CLEAN_CONDITIONALLY_TAGS","HEADER_TAGS","HEADER_TAG_LIST","UNLIKELY_CANDIDATES_BLACKLIST","UNLIKELY_CANDIDATES_WHITELIST","DIV_TO_P_BLOCK_TAGS","POSITIVE_SCORE_HINTS","POSITIVE_SCORE_RE","NEGATIVE_SCORE_HINTS","NEGATIVE_SCORE_RE","IS_WP_SELECTOR","PAGE_RE","BLOCK_LEVEL_TAGS","BLOCK_LEVEL_TAGS_RE","candidatesBlacklist","CANDIDATES_BLACKLIST","candidatesWhitelist","CANDIDATES_WHITELIST","stripUnlikelyCandidates","not","classes","id","classAndId","brsToPs","collapsing","element","nextElement","next","paragraphize","br","sibling","nextSibling","p","appendTo","convertDivs","div","$div","convertable","convertSpans","span","$span","convertToParagraphs","convertNodeTo","attribString","cleanForHeight","height","parseInt","removeSpacers","cleanImages","$article","markToKeep","article","tags","protocol","addClass","stripJunkTags","removeClass","cleanHOnes","$hOnes","removeAllButWhitelist","cleanAttributes","parent","removeEmpty","$p","text","trim","NON_TOP_CANDIDATE_TAGS","NON_TOP_CANDIDATE_TAGS_RE","HNEWS_CONTENT_SELECTORS","PHOTO_HINTS","PHOTO_HINTS_RE","READABILITY_ASSET","DIGIT_RE","BR_TAGS_RE","BR_TAG_RE","UNLIKELY_RE","PARAGRAPH_SCORE_TAGS","CHILD_CONTENT_TAGS","BAD_TAGS","HTML_OR_BODY_RE","getWeight","score","getScore","parseFloat","scoreCommas","idkRe","scoreLength","textLength","chunks","lengthBonus","Math","min","max","scoreParagraph","slice","setScore","addScore","amount","getOrInitScore","e","addToParent","weightNodes","scoreNode","addScoreTo","scorePs","rawScore","scoreContent","parentSelector","childSelector","NORMALIZE_RE","normalizeSpaces","extractFromUrl","regexList","matchRe","re","exec","PAGE_IN_HREF_RE","HAS_ALPHA_RE","IS_ALPHA_RE","IS_DIGIT_RE","pageNumFromUrl","matches","pageNum","removeAnchor","isGoodSegment","segment","firstSegmentHasLetters","goodSegment","toLowerCase","articleBaseUrl","parsed","host","path","cleanedSegments","reverse","rawSegment","possibleSegment","fileExt","push","SENTENCE_END_RE","hasSentenceEnd","excerptContent","words","mergeSiblings","$candidate","topScore","siblingScoreThreshold","wrappingDiv","$sibling","siblingScore","contentBonus","density","linkDensity","newScore","siblingContent","siblingContentLength","findTopCandidate","first","removeUnlessContent","weight","hasClass","pCount","inputCount","imgCount","nodeIsList","previousNode","prev","scriptCount","cleanTags","cleanHeaders","title","header","$header","prevAll","rewriteTopLevel","absolutize","rootUrl","$content","absoluteUrl","makeLinksAbsolute","totalTextLength","linkText","linkLength","extractFromMeta","metaNames","cachedNames","foundNames","indexOf","name","nodes","values","toArray","metaValue","stripTags","isGoodNode","maxChildren","withinComment","extractFromSelectors","selectors","textOnly","html","cleanText","commentParent","class","undefined","nodeIsSufficient","isWordpress","CLEAN_AUTHOR_RE","TEXT_LINK_RE","MS_DATE_STRING","SEC_DATE_STRING","CLEAN_DATE_STRING_RE","TIME_MERIDIAN_SPACE_RE","TIME_MERIDIAN_DOTS_RE","months","allMonths","timestamp1","timestamp2","SPLIT_DATE_STRING","TITLE_SPLITTERS_RE","DOMAIN_ENDINGS_RE","cleanAuthor","author","leadImageUrl","validUrl","isWebUri","cleanDek","dek","excerpt","dekText","cleanDateString","dateString","cleanDatePublished","date","moment","Date","isValid","toISOString","extractCleanNode","cleanConditionally","defaultCleaner","cleanTitle","resolveSplitTitle","h1","extractBreadcrumbTitle","splitTitle","termCounts","titleText","maxTerm","termCount","splitEnds","longestEnd","cleanDomainFromTitle","nakedDomain","startSlug","startSlugRatio","wuzzy","levenshtein","endSlug","endSlugRatio","newTitle","Cleaners","cleanImage","cleanContent","extractBestNode","opts","$topCandidate","GenericContentExtractor","defaultOpts","getContentNode","cleanAndReturnNode","k","STRONG_TITLE_META_TAGS","WEAK_TITLE_META_TAGS","STRONG_TITLE_SELECTORS","WEAK_TITLE_SELECTORS","GenericTitleExtractor","metaCache","AUTHOR_META_TAGS","AUTHOR_MAX_LENGTH","AUTHOR_SELECTORS","bylineRe","BYLINE_SELECTORS_RE","GenericAuthorExtractor","regex","DATE_PUBLISHED_META_TAGS","DATE_PUBLISHED_SELECTORS","abbrevMonthsStr","DATE_PUBLISHED_URL_RES","GenericDatePublishedExtractor","datePublished","GenericDekExtractor","LEAD_IMAGE_URL_META_TAGS","LEAD_IMAGE_URL_SELECTORS","POSITIVE_LEAD_IMAGE_URL_HINTS","POSITIVE_LEAD_IMAGE_URL_HINTS_RE","NEGATIVE_LEAD_IMAGE_URL_HINTS","NEGATIVE_LEAD_IMAGE_URL_HINTS_RE","GIF_RE","JPG_RE","getSig","scoreImageUrl","scoreAttr","scoreByParents","$figParent","$gParent","scoreBySibling","scoreByDimensions","area","round","scoreByPosition","$imgs","GenericLeadImageUrlExtractor","cleanUrl","imageUrl","imgs","imgScores","topUrl","href","scoreSimilarity","articleUrl","similarity","difflib","SequenceMatcher","ratio","diffPercent","diffModifier","scoreLinkText","linkTextAsNum","scorePageInLink","isWp","EXTRANEOUS_LINK_HINTS","EXTRANEOUS_LINK_HINTS_RE","NEXT_LINK_TEXT_RE","CAP_LINK_TEXT_RE","PREV_LINK_TEXT_RE","scoreExtraneousLinks","makeSig","$link","positiveMatch","negativeMatch","parentData","scorePrevLink","linkData","shouldScore","baseUrl","previousUrls","linkHost","fragment","scoreBaseUrl","baseRegex","scoreNextLinkText","scoreCapLinks","makeBaseRegex","scoreLinks","links","scoredPages","possiblePages","link","possiblePage","GenericNextPageUrlExtractor","scoredLinks","topPage","scoredLink","CANONICAL_META_SELECTORS","parseDomain","GenericUrlExtractor","$canonical","metaUrl","EXCERPT_META_SELECTORS","maxLength","ellipsize","ellipse","GenericExcerptExtractor","shortContent","GenericWordCountExtractor","GenericExtractor","extract","bind","stringDirection","getDirection","date_published","lead_image_url","next_page_url","word_count","direction","url_and_domain","getExtractor","baseDomain","Extractors","cleanBySelectors","transformElements","transforms","$matches","findMatchingSelector","Array","isArray","s","select","extractionOpts","extractHtml","matchingSelector","wrap","extractResult","fallback","RootExtractor","contentOnly","extractedTitle","Extractor","pages","create","extractorOpts","nextPageResult","collectAllPages","Mercury","fetchAllPages"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;eAAyBA;;AAAzB,AAAe,SAAUA,KAAV;MAAgBC,KAAhB,uEAAwB,CAAxB;MAA2BC,GAA3B,uEAAiC,CAAjC;;;;;gBACND,SAASC,GADH;;;;;;iBAELD,SAAS,CAFJ;;;;;;;;;;;;;;ACAf;AACA,AAAe,SAASE,WAAT,OAAmC;MAAZC,QAAY,QAAZA,QAAY;;;SAEzC,CAAC,CAACA,QAAT;;;ACHF,IAAMC,SAAS;UACL;WACC,IADD;cAEI;;CAHd,CAOA;;ACPO,IAAMC,kBAAkB;gBACf;CADT;;;AAKP,AAAO,IAAMC,gBAAgB,KAAtB;;;AAGP,IAAMC,oBAAoB,CACxB,YADwB,EAExB,WAFwB,EAGxB,YAHwB,EAIxB,WAJwB,CAA1B;;AAOA,AAAO,IAAMC,uBAAuB,IAAIC,MAAJ,QAAgBF,kBAAkBG,IAAlB,CAAuB,GAAvB,CAAhB,SAAiD,GAAjD,CAA7B;;;;AAIP,AAAO,IAAMC,qBAAqB,OAA3B;;;;qCAKP,AAAO,AACP,AAAO,AAKP,AAAO;;ACnBP,SAASC,GAAT,CAAaC,OAAb,EAAsB;SACb,aAAY,UAACC,OAAD,EAAUC,MAAV,EAAqB;YAC9BF,OAAR,EAAiB,UAACG,GAAD,EAAMC,QAAN,EAAgBC,IAAhB,EAAyB;UACpCF,GAAJ,EAAS;eACAA,GAAP;OADF,MAEO;gBACG,EAAEE,UAAF,EAAQD,kBAAR,EAAR;;KAJJ;GADK,CAAP;;;;;;;;AAgBF,AAAO,SAASE,gBAAT,CAA0BF,QAA1B,EAAyD;MAArBG,WAAqB,uEAAP,KAAO;;;;;;;;MAQ3DH,SAASI,aAAT,IAA0BJ,SAASI,aAAT,KAA2B,IAAtD,IACEJ,SAASK,UAAT,KAAwB,GAF5B,EAGE;QACI,CAACL,SAASK,UAAd,EAA0B;YAClB,IAAIC,KAAJ,sDAC+CN,SAASO,KADxD,CAAN;KADF,MAIO,IAAI,CAACJ,WAAL,EAAkB;YACjB,IAAIG,KAAJ,kDAC2CN,SAASK,UADpD,wEAAN;;;;0BASAL,SAASQ,OAzBiD;MAuB5CC,WAvB4C,qBAuB5D,cAvB4D;MAwB1CC,aAxB0C,qBAwB5D,gBAxB4D;;;;MA4B1DnB,qBAAqBoB,IAArB,CAA0BF,WAA1B,CAAJ,EAA4C;UACpC,IAAIH,KAAJ,yCACkCG,WADlC,0BAAN;;;;MAMEC,gBAAgBhB,kBAApB,EAAwC;UAChC,IAAIY,KAAJ,yEACkEZ,kBADlE,OAAN;;;SAKK,IAAP;;;;;AAKF,AAAO;;;;;;;;AAUP;yDAAe,iBAA6BkB,GAA7B,EAAkCC,SAAlC;;;;;;;wBACDA,aAAaC,IAAIC,KAAJ,CAAUC,UAAUJ,GAAV,CAAV,CAAzB;;mBADa,GAGG;mBACTC,SADS;oCAEAzB,eAAd,CAFc;uBAGLC,aAHK;;;wBAMJ,IANI;;mBAQT,IARS;;oBAUR,IAVQ;;kCAYM;aAfT;;mBAkBoBM,IAAIC,OAAJ,CAlBpB;;;;oBAAA,SAkBLI,QAlBK;gBAAA,SAkBKC,IAlBL;;;6BAqBMD,QAAjB;6CACO;wBAAA;;aAtBI;;;;;6CA2BJb,OAAO8B,MA3BH;;;;;;;;GAAf;;WAA8BC,aAA9B;;;;SAA8BA,aAA9B;;;ACpFA,SAASC,eAAT,CAAyBC,CAAzB,EAA4BC,IAA5B,EAAkCC,EAAlC,EAAsC;cAC1BD,IAAV,QAAmBE,IAAnB,CAAwB,UAACC,CAAD,EAAIC,IAAJ,EAAa;QAC7BC,QAAQN,EAAEK,IAAF,CAAd;;QAEME,QAAQD,MAAME,IAAN,CAAWP,IAAX,CAAd;UACMO,IAAN,CAAWN,EAAX,EAAeK,KAAf;UACME,UAAN,CAAiBR,IAAjB;GALF;;SAQOD,CAAP;;;;;;;;;;AAUF,AAAe,SAASU,iBAAT,CAA2BV,CAA3B,EAA8B;MACvCD,gBAAgBC,CAAhB,EAAmB,SAAnB,EAA8B,OAA9B,CAAJ;MACID,gBAAgBC,CAAhB,EAAmB,UAAnB,EAA+B,MAA/B,CAAJ;SACOA,CAAP;;;ACtBK,IAAMW,UAAU,IAAIvC,MAAJ,CAAW,WAAX,EAAwB,GAAxB,CAAhB;AACP,AAAO,IAAMwC,WAAW,IAAIxC,MAAJ,CAAW,kBAAX,EAA+B,GAA/B,CAAjB;;AAEP,AAAO,IAAMyC,iBAAiB,CAC5B,QAD4B,EAE5B,OAF4B,EAG5B,MAH4B,EAI5BxC,IAJ4B,CAIvB,GAJuB,CAAvB;;ACEP;;;;;AAKA,AAAe,SAASyC,uBAAT,CAAiCd,CAAjC,EAAoC;IAC/C,KAAF,EAASG,IAAT,CAAc,UAACC,CAAD,EAAIW,GAAJ,EAAY;qBACRA,IAAIC,OAApB,EAA6BC,OAA7B,CAAqC,UAACT,IAAD,EAAU;UACvCD,QAAQQ,IAAIC,OAAJ,CAAYR,IAAZ,CAAd;;UAEIA,SAAS,KAAT,IAAkBG,QAAQpB,IAAR,CAAagB,KAAb,CAAlB,IACAK,SAASrB,IAAT,CAAcgB,KAAd,CADJ,EAC0B;UACtBQ,GAAF,EAAOP,IAAP,CAAY,KAAZ,EAAmBD,KAAnB;;KALJ;GADF;;SAWOP,CAAP;;;ACpBF,SAASkB,SAAT,CAAmBC,KAAnB,EAA0Bd,IAA1B,EAAgC;SACvBA,KAAKe,IAAL,KAAc,SAArB;;;AAGF,SAASC,aAAT,CAAuBrB,CAAvB,EAA0B;IACtBsB,IAAF,GAASC,IAAT,CAAc,GAAd,EACSC,QADT,GAESC,MAFT,CAEgBP,SAFhB,EAGSQ,MAHT;;SAKO1B,CAAP;;;AAGF,AAAe,SAAS2B,KAAT,CAAe3B,CAAf,EAAkB;IAC7Ba,cAAF,EAAkBa,MAAlB;;MAEIL,cAAcrB,CAAd,CAAJ;SACOA,CAAP;;;ACVF,IAAM4B,WAAW;;;;;;;;QAAA,kBAQFpC,GARE,EAQGqC,gBARH,EAQqBpC,SARrB,EAQgC;;;;;;;;;oBAAA;;mBAGzCoC,gBAHyC;;;;;2BAAA,GAIrB;+BACL,IADK;4BAER,GAFQ;yBAGX;kCACS,WADT;oCAEW;;eATqB;;;uBAalC,EAAEhD,MAAMgD,gBAAR,EAA0BjD,UAAUkD,aAApC,EAAT;;;;;;qBAEehC,gBAAcN,GAAd,EAAmBC,SAAnB,CAf4B;;;oBAAA;;;mBAkBzCsC,OAAO5C,KAlBkC;;;;;+CAmBpC4C,MAnBoC;;;+CAsBtC,MAAKC,WAAL,CAAiBD,MAAjB,CAtBsC;;;;;;;;;GARhC;aAAA,6BAiC0B;QAArBE,OAAqB,QAA3BpD,IAA2B;QAAZD,QAAY,QAAZA,QAAY;QACfS,WADe,GACCT,SAASQ,OADV,CAC/B,cAD+B;;;;;QAKnC,CAACC,YAAY6C,QAAZ,CAAqB,MAArB,CAAD,IACA,CAAC7C,YAAY6C,QAAZ,CAAqB,MAArB,CADL,EACmC;YAC3B,IAAIhD,KAAJ,CAAU,qCAAV,CAAN;;;QAGEc,IAAImC,QAAQC,IAAR,CAAaH,OAAb,EAAsB,EAAEI,qBAAqB,IAAvB,EAAtB,CAAR;;QAEIrC,EAAEsB,IAAF,GAASgB,QAAT,GAAoBC,MAApB,KAA+B,CAAnC,EAAsC;YAC9B,IAAIrD,KAAJ,CAAU,kCAAV,CAAN;;;QAGEwB,kBAAkBV,CAAlB,CAAJ;QACIc,wBAAwBd,CAAxB,CAAJ;QACI2B,MAAM3B,CAAN,CAAJ;;WAEOA,CAAP;;CArDJ,CAyDA;;AClEA,IAAMwC,QAAQ,SAARA,KAAQ,CAACC,SAAD,EAAYC,OAAZ;SACZA,QAAQC,MAAR,CAAe,UAACC,GAAD,EAAMC,MAAN,EAAiB;QAC1BA,MAAJ,IAAcJ,SAAd;WACOG,GAAP;GAFF,EAGG,EAHH,CADY;CAAd;;AAOA,AAAe,SAASE,qBAAT,CAA+BL,SAA/B,EAA0C;SAChDA,UAAUM,gBAAV,GACLP,MAAMC,SAAN,GAAkBA,UAAUI,MAA5B,4BAAuCJ,UAAUM,gBAAjD,GADK,GAGLP,MAAMC,SAAN,EAAiB,CAACA,UAAUI,MAAX,CAAjB,CAHF;;;ACRK,IAAMG,mBAAmB;UACtB,cADsB;WAErB;;;;eAII,CACT,wBADS,CAJJ;;;WASA,EATA;;;gBAaK;gBACA;;GAhBgB;;UAoBtB;eACK,CACT,mBADS;GArBiB;;SA0BvB;eACM,CACT,gBADS;GA3BiB;;kBAgCd;eACH,CACT,kBADS;;CAjCR;;ACAA,IAAMC,iBAAiB;UACpB,WADoB;WAEnB;;eAEI,CACT,qBADS,EAET,cAFS,EAGT,iBAHS,CAFJ;;;WASA,CACL,KADK,EAEL,uBAFK,CATA;;;;;;;;gBAoBK;;UAEN,IAFM;;;gBAKA,kBAAC3C,KAAD,EAAW;YACb4C,YAAY5C,MAAMgC,QAAN,EAAlB;YACIY,UAAUX,MAAV,KAAqB,CAArB,IAA0BW,UAAU3E,GAAV,CAAc,CAAd,EAAiB4E,OAAjB,KAA6B,KAA3D,EAAkE;iBACzD,QAAP;;;eAGK,IAAP;;;GAjCsB;;SAsCrB;eACM,CACT,uBADS,EAET,qBAFS,EAGT,IAHS;GAvCe;;UA8CpB;eACK,CACT,aADS,EAET,sBAFS;GA/Ce;;OAqDvB;eACQ,CACT,sBADS;GAtDe;;kBA2DZ;eACH,CACT,CAAC,kCAAD,EAAqC,UAArC,CADS,EAET,wBAFS;;CA5DR;;ACAA,IAAMC,qBAAqB;UACxB,eADwB;WAEvB;eACI,CACT,kBADS,CADJ;;oBAKS,KALT;;;gBAQK;sBACM,oBAAC9C,KAAD,EAAW;YACnB+C,UAAU/C,MAAMgD,OAAN,CAAc,UAAd,CAAhB;;YAEID,QAAQf,QAAR,CAAiB,KAAjB,EAAwBC,MAAxB,KAAmC,CAAvC,EAA0C;kBAChCgB,OAAR,CAAgBjD,KAAhB;;OALM;0BAQU,YARV;kBASE;KAjBP;;;WAqBA,CACL,iBADK,EAEL,oCAFK,EAGL,MAHK,EAIL,SAJK;;GAvBuB;;UAgCxB,wBAhCwB;;SAkCzB;eACM,CACT,UADS;GAnCmB;;kBAwChB;eACH,CACT,sBADS;;;CAzCR;;ACAA,IAAMkD,mBAAmB;UACtB,aADsB;;WAGrB;gBACK;;;;;+BAKe,2BAAClD,KAAD,EAAQN,CAAR,EAAc;YAC/ByD,SAASnD,MAAMiB,IAAN,CAAW,QAAX,CAAf;YACMmC,kBAAkB1D,EAAE,iCAAF,CAAxB;wBACgB2D,MAAhB,CAAuBF,MAAvB;cACMG,WAAN,CAAkBF,eAAlB;OATQ;;;;SAcP;KAfE;;eAkBI,CACT,uBADS,CAlBJ;;oBAsBS,KAtBT;;WAwBA,CACL,qBADK,EAEL,QAFK,EAGL,sBAHK;GA3BqB;;UAkCtB;eACK,CACT,kCADS;GAnCiB;;kBAwCd;eACH,CACT,CAAC,4CAAD,EAA+C,cAA/C,CADS;;;CAzCR;;ACAA,IAAMG,mBAAmB;UACtB,iBADsB;;SAGvB;eACM,CACT,aADS,EAET,aAFS;GAJiB;;UAUtB;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS,EAET,WAFS,EAGT,SAHS;GAXiB;;WAkBrB;eACI,CACT,cADS,EAET,eAFS,CADJ;;oBAMS,KANT;;gBAQK;oBACI,kBAACvD,KAAD,EAAW;YACnBwD,MAAMxD,MAAME,IAAN,CAAW,KAAX,CAAV;;;;;;;;;;YAUMuD,QAAQ,GAAd;;cAEMD,IAAIE,OAAJ,CAAY,UAAZ,EAAwBD,KAAxB,CAAN;cACMvD,IAAN,CAAW,KAAX,EAAkBsD,GAAlB;;KAvBG;;WA2BA,CACL,KADK,EAEL,qBAFK,EAGL,2BAHK,EAIL,kBAJK,EAKL,mBALK,EAML,QANK,EAOL,kBAPK,EAQL,SARK;GA7CqB;;kBAyDd,IAzDc;;kBA2Dd,IA3Dc;;OA6DzB,IA7DyB;;iBA+Df,IA/De;;WAiErB;CAjEJ;;ACAP;;AAEA,AAAO,IAAMG,uBAAuB;UAC1B,qBAD0B;SAE3B;eACM,CACT,QADS;GAHqB;;UAQ1B;eACK,CACT,0DADS;GATqB;;WAczB;eACI,CACT,eADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;GA3ByB;;kBAgClB,IAhCkB;;kBAkClB,IAlCkB;;OAoC7B,IApC6B;;iBAsCnB,IAtCmB;;WAwCzB;CAxCJ;;ACFP;;;AAGA,AAAO,IAAMC,qBAAqB;UACxB,mBADwB;SAEzB;eACM,CACT,UADS;GAHmB;;UAQxB;eACK,CACT,eADS;GATmB;;WAcvB;eACI,CACT,iBADS,EAET,iBAFS,CADJ;;;;gBAQK,EARL;;;;;WAcA;GA5BuB;;kBAiChB;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GAlCmB;;kBAuChB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAxCmB;;OA6C3B;eACQ,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS;GA9CmB;;iBAmDjB,IAnDiB;;WAqDvB;CArDJ;;ACHP;;;AAGA,AAAO,IAAMC,iBAAiB;UACpB,eADoB;SAErB;eACM,CACT,eADS;GAHe;;UASpB;eACK,CACT,iBADS;GAVe;;WAgBnB;eACI,CACT,iBADS,CADJ;;;;gBAQK,EARL;;;;;WAcA,CACL,kBADK;GA9BmB;;kBAoCZ;eACH,CACT,CAAC,gCAAD,EAAmC,OAAnC,CADS;GArCe;;kBA0CZ;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA3Ce;;OAgDvB;eACQ,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS;GAjDe;;iBAsDb,IAtDa;;WAwDnB;CAxDJ;;ACHP;;;AAGA,AAAO,IAAMC,eAAe;UAClB,aADkB;SAEnB;eACM,CACT,IADS;GAHa;;UASlB;eACK,CACT,qBADS;GAVa;;WAgBjB;eACI,CACT,cADS,CADJ;;;;gBAQK,EARL;;;;;WAcA,CACL,cADK;GA9BiB;;kBAoCV;eACH,CACT,WADS;GArCa;;kBA0CV;eACH;GA3Ca;;OAgDrB;eACQ,CACT,CAAC,0BAAD,EAA6B,OAA7B,CADS;GAjDa;;iBAsDX,IAtDW;;WAwDjB;CAxDJ;;ACHP;;;AAGA,AAAO,IAAMC,iBAAiB;UACpB,eADoB;SAErB;eACM,CACT,sBADS;GAHe;;UASpB;eACK,CACT,oBADS;GAVe;;WAgBnB;eACI;;qBAAA,CADJ;;;;gBAQK,EARL;;;;;WAcA,CACL,iBADK;GA9BmB;;kBAoCZ;eACH,CACT,CAAC,qBAAD,EAAwB,UAAxB,CADS;GArCe;;kBA0CZ;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA3Ce;;OAgDvB;eACQ,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS;GAjDe;;iBAuDb,IAvDa;;WAyDnB;CAzDJ;;ACHP;;;AAGA,AAAO,IAAMC,oBAAoB;UACvB,kBADuB;SAExB;eACM,CACT,qBADS;GAHkB;;UASvB;eACK,CACT,gCADS,EACyB,gBADzB;GAVkB;;WAgBtB;eACI,CACT,gBADS,CADJ;;oBAMS,KANT;;;;gBAUK;UACN;KAXC;;;;;WAiBA,CACL,oBADK,EAEL,uEAFK,EAGL,YAHK;GAjCsB;;kBAwCf;eACH,CACT,gBADS;GAzCkB;;kBA+Cf;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAhDkB;;OAqD1B;eACQ,CACT,CAAC,0BAAD,EAA6B,OAA7B,CADS;GAtDkB;;iBA2DhB,IA3DgB;;WA6DtB;CA7DJ;;ACHP;;;AAGA,AAAO,IAAMC,iBAAiB;UACpB,kBADoB;SAErB;eACM,CACT,gBADS;GAHe;;UASpB;eACK,CACT,eADS,EACQ,KADR;GAVe;;WAgBnB;eACI,CACT,eADS,EAET,gBAFS,CADJ;;;;gBASK,EATL;;;;;WAeA;GA/BmB;;kBAoCZ;eACH,CACT,CAAC,qCAAD,EAAwC,OAAxC,CADS;GArCe;;kBA0CZ;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA3Ce;;OAgDvB;eACQ,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS;GAjDe;;iBAsDb,IAtDa;;WAwDnB;CAxDJ;;ACHP;;;AAGA,AAAO,IAAMC,wBAAwB;UAC3B,sBAD2B;SAE5B;eACM,CACT,eADS;GAHsB;;UAS3B;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS;GAVsB;;WAgB1B;eACI;;uBAAA,EAGT,kBAHS,CADJ;;;;gBASK,EATL;;;;;WAeA;GA/B0B;;kBAoCnB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GArCsB;;iBA0CpB,IA1CoB;;WA4C1B;CA5CJ;;ACHP;;;AAGA,AAAO,IAAMC,oBAAoB;UACvB,kBADuB;SAExB;eACM;;KAER,uBAAD,EAA0B,OAA1B,CAFS;GAHkB;;UASvB;eACK,CACT,oCADS;GAVkB;;WAetB;eACI;;yBAAA,EAGT,gBAHS,EAGS,aAHT,EAIT,aAJS,CADJ;;;;gBAUK,EAVL;;;;;WAgBA,CACL,YADK;GA/BsB;;kBAoCf;eACH,CACT,CAAC,+CAAD,EAAkD,UAAlD,CADS;GArCkB;;kBA2Cf;eACH;;KAER,uBAAD,EAA0B,OAA1B,CAFS;GA5CkB;;OAmD1B;eACQ,CACT,CAAC,0BAAD,EAA6B,OAA7B,CADS;GApDkB;;iBAyDhB,IAzDgB;;WA2DtB;CA3DJ;;ACHA,IAAMC,oBAAoB;UACvB,cADuB;;oBAGb,CAChB,aADgB,EAEhB,gBAFgB,EAGhB,YAHgB,EAIhB,aAJgB,EAKhB,cALgB,EAMhB,WANgB,CAHa;;SAYxB;eACM,CACT,aADS;GAbkB;;UAkBvB;eACK,CACT,SADS;GAnBkB;;WAwBtB;eACI,CACT,eADS,EAET,gBAFS,CADJ;;;;gBAQK;0DAC0C,8CAACpE,KAAD,EAAW;YACvDqE,YAAYrE,MAAME,IAAN,CAAW,IAAX,EAAiBoE,KAAjB,CAAuB,UAAvB,EAAmC,CAAnC,CAAlB;cACMpE,IAAN,CAAW,KAAX,qCAAmDmE,SAAnD;;KAXG;;;;;WAkBA;GA1CsB;;kBA8Cf;eACH,CACT,CAAC,wBAAD,EAA2B,UAA3B,CADS;GA/CkB;;kBAoDf;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GArDkB;;OA0D1B;eACQ;;;GA3DkB;;iBAgEhB;eACF;;;GAjEkB;;WAsEtB;eACI;;;;CAvER;;ACAP;;;AAGA,AAAO,IAAME,yBAAyB;UAC5B,uBAD4B;SAE7B;eACM,CACT,kBADS;GAHuB;;UAQ5B;eACK,CACT,uBADS;GATuB;;WAc3B;eACI,CACT,2BADS,CADJ;;;;gBAOK,EAPL;;;;;WAaA;GA3B2B;;kBAgCpB;eACH,CACT,CAAC,8BAAD,EAAiC,OAAjC,CADS;GAjCuB;;kBAsCpB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GAvCuB;;OA4C/B;eACQ,CACT,CAAC,6BAAD,EAAgC,OAAhC,CADS;GA7CuB;;iBAkDrB;eACF;;;GAnDuB;;WAwD3B;eACI;;;;CAzDR;;ACHP;;;AAGA,AAAO,IAAMC,4BAA4B;UAC/B,0BAD+B;SAEhC;eACM,CACT,aADS;GAH0B;;UAQ/B;eACK,CACT,mBADS;GAT0B;;WAc9B;eACI,CACT,mBADS,CADJ;;;;gBAOK;wDACwC,+CAACxE,KAAD,EAAQN,CAAR,EAAc;YACxD+E,OAAOC,KAAKrF,KAAL,CAAWW,MAAME,IAAN,CAAW,YAAX,CAAX,CAAb;YACQsD,GAFsD,GAE9CiB,KAAKE,OAAL,CAAa,CAAb,CAF8C,CAEtDnB,GAFsD;;YAGxDoB,OAAOlF,EAAE,SAAF,EAAaQ,IAAb,CAAkB,KAAlB,EAAyBsD,GAAzB,CAAb;cACMF,WAAN,CAAkBsB,IAAlB;;KAZG;;;;;WAmBA;GAjC8B;;kBAsCvB;eACH,CACT,CAAC,kCAAD,EAAqC,UAArC,CADS;GAvC0B;;kBA4CvB;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA7C0B;;OAkDlC;eACQ,CACT,CAAC,wBAAD,EAA2B,OAA3B,CADS;GAnD0B;;iBAwDxB;eACF;;;GAzD0B;;WA8D9B;eACI;;;;CA/DR;;ACHA,IAAMC,kBAAkB;UACrB,YADqB;;oBAGX,CAChB,4BADgB,CAHW;;SAOtB;eACM,CACT,IADS;GARgB;;UAarB;eACK,CACT,CAAC,qBAAD,EAAwB,OAAxB,CADS;GAdgB;;WAmBpB;eACI,CACT,kBADS,CADJ;;;;gBAOK;;cAEF,gBAAC7E,KAAD,EAAW;YACX8E,OACJ,kEADF;YAEMC,QAAQC,mBAAmBhF,MAAME,IAAN,CAAW,gBAAX,CAAnB,CAAd;;YAEI4E,KAAK7F,IAAL,CAAU8F,KAAV,CAAJ,EAAsB;6BACGA,MAAME,KAAN,CAAYH,IAAZ,CADH;;cACbhF,CADa;cACVuE,SADU;;;gBAEdnE,IAAN,CAAW,KAAX,qCAAmDmE,SAAnD;cACMtB,UAAU/C,MAAMgD,OAAN,CAAc,QAAd,CAAhB;kBACQC,OAAR,CAAgBjD,MAAMkF,KAAN,EAAhB;gBACM9D,MAAN;;;KAnBC;;;;;WA2BA;GA9CoB;;kBAmDb;eACH,CACT,CAAC,gBAAD,EAAmB,UAAnB,CADS;GApDgB;;kBAyDb;eACH,CACT,CAAC,uBAAD,EAA0B,OAA1B,CADS;GA1DgB;;OA+DxB;eACQ;;;GAhEgB;;iBAqEd;eACF;;;GAtEgB;;WA2EpB;eACI;;;;CA5ER;;;;;;;;;;;;;;;;;;;;;;;;;ACGP,iBAAe,aAAY+D,gBAAZ,EAA8B9C,MAA9B,CAAqC,UAACC,GAAD,EAAM8C,GAAN,EAAc;MAC1DjD,YAAYgD,iBAAiBC,GAAjB,CAAlB;sBAEK9C,GADL,EAEKE,sBAAsBL,SAAtB,CAFL;CAFa,EAMZ,EANY,CAAf;;ACHA;AACA,AAAO,IAAMkD,YAAY,IAAIvH,MAAJ,CAAW,gCAAX,EAA6C,GAA7C,CAAlB;;;;AAIP,AAAO,IAAMwH,aAAa,qBAAnB;;AAEP,AAAO,IAAMC,iBAAiB,CAC5B,wCAD4B,EAE5B,uCAF4B,EAG5B,qCAH4B,EAI5B,oCAJ4B,CAAvB;;;AAQP,AAAO,IAAMC,oBAAoB,CAC/B,OAD+B,EAE/B,QAF+B,EAG/B,UAH+B,EAI/B,MAJ+B,EAK/B,OAL+B,EAM/B,IAN+B,EAO/B,OAP+B,EAQ/B,QAR+B,EAS/B,QAT+B,CAA1B;;;AAaP,AAAO,IAAMC,eAAe,CAAC,OAAD,EAAU,OAAV,CAArB;AACP,AAAO,IAAMC,wBAAwBD,aAAaE,GAAb,CAAiB;eAAgBC,QAAhB;CAAjB,CAA9B;AACP,AAAO,IAAMC,mBAAmBJ,aAAa1H,IAAb,CAAkB,GAAlB,CAAzB;AACP,AAAO,IAAM+H,kBAAkB,CAAC,KAAD,EAAQ,QAAR,EAAkB,MAAlB,EAA0B,OAA1B,EAAmC,IAAnC,EAAyC,KAAzC,CAAxB;AACP,AAAO,IAAMC,qBAAqB,IAAIjI,MAAJ,QAAgBgI,gBAAgB/H,IAAhB,CAAqB,GAArB,CAAhB,SAA+C,GAA/C,CAA3B;;;AAGP,AAAO,IAAMiI,oBAAoB,CAAC,GAAD,CAA1B;AACP,AAAO,IAAMC,yBAAyBD,kBAAkBL,GAAlB,CAAsB;SAAUO,GAAV;CAAtB,EAA6CnI,IAA7C,CAAkD,GAAlD,CAA/B;;;AAGP,AAAO,IAAMoI,2BAA2B,CAAC,IAAD,EAAO,IAAP,EAAa,OAAb,EAAsB,KAAtB,EAA6B,QAA7B,EAAuC,MAAvC,EAA+CpI,IAA/C,CAAoD,GAApD,CAAjC;;;AAGP,IAAMqI,cAAc,CAAC,IAAD,EAAO,IAAP,EAAa,IAAb,EAAmB,IAAnB,EAAyB,IAAzB,CAApB;AACA,AAAO,IAAMC,kBAAkBD,YAAYrI,IAAZ,CAAiB,GAAjB,CAAxB;;;;;;;;AAQP,AAAO,IAAMuI,gCAAgC,CAC3C,UAD2C,EAE3C,OAF2C,EAG3C,QAH2C,EAI3C,SAJ2C,EAK3C,SAL2C,EAM3C,KAN2C,EAO3C,gBAP2C,EAQ3C,OAR2C,EAS3C,SAT2C,EAU3C,cAV2C,EAW3C,QAX2C,EAY3C,iBAZ2C,EAa3C,OAb2C,EAc3C,MAd2C;;AAgB3C,QAhB2C,EAiB3C,QAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C;AAoB3C,MApB2C,EAqB3C,MArB2C,EAsB3C,KAtB2C,EAuB3C,UAvB2C,EAwB3C,OAxB2C,EAyB3C,YAzB2C,EA0B3C,UA1B2C;AA2B3C,2BA3B2C;AA4B3C,OA5B2C,EA6B3C,eA7B2C,EA8B3C,SA9B2C,EA+B3C,QA/B2C,EAgC3C,QAhC2C,EAiC3C,KAjC2C,EAkC3C,OAlC2C,EAmC3C,UAnC2C,EAoC3C,SApC2C,EAqC3C,UArC2C,EAsC3C,SAtC2C,EAuC3C,SAvC2C,EAwC3C,OAxC2C,CAAtC;;;;;;;;;;;;;AAsDP,AAAO,IAAMC,gCAAgC,CAC3C,KAD2C,EAE3C,SAF2C,EAG3C,MAH2C,EAI3C,WAJ2C,EAK3C,QAL2C,EAM3C,SAN2C,EAO3C,qBAP2C,EAQ3C,QAR2C;AAS3C,OAT2C,EAU3C,QAV2C,EAW3C,OAX2C,EAY3C,MAZ2C,EAa3C,MAb2C,EAc3C,OAd2C,EAe3C,QAf2C,CAAtC;;;;;AAqBP,AAAO,IAAMC,sBAAsB,CACjC,GADiC,EAEjC,YAFiC,EAGjC,IAHiC,EAIjC,KAJiC,EAKjC,KALiC,EAMjC,GANiC,EAOjC,KAPiC,EAQjC,OARiC,EASjCzI,IATiC,CAS5B,GAT4B,CAA5B;;;;AAaP,AAAO;;AAeP,AAAO;;;;;AAMP,AAAO;;AASP,AAAO;AAMP,AAAO;;;;;;AAMP,AAAO,IAAM0I,uBAAuB,CAClC,SADkC,EAElC,gBAFkC,EAGlC,iBAHkC,EAIlC,MAJkC,EAKlC,MALkC,EAMlC,SANkC,EAOlC,qBAPkC,EAQlC,OARkC,EASlC,QATkC,EAUlC,MAVkC,EAWlC,QAXkC,EAYlC,MAZkC,EAalC,YAbkC,EAclC,WAdkC,EAelC,MAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,UAlBkC;AAmBlC,SAnBkC,CAA7B;;;AAuBP,AAAO,IAAMC,oBAAoB,IAAI5I,MAAJ,CAAW2I,qBAAqB1I,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO;;;;;;AAMP,AAAO,IAAM4I,uBAAuB,CAClC,OADkC,EAElC,QAFkC,EAGlC,QAHkC,EAIlC,KAJkC,EAKlC,UALkC,EAMlC,QANkC,EAOlC,QAPkC,EAQlC,OARkC,EASlC,MATkC,EAUlC,OAVkC,EAWlC,SAXkC,EAYlC,YAZkC,EAalC,SAbkC,EAclC,MAdkC,EAelC,QAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,MAlBkC,EAmBlC,SAnBkC,EAoBlC,UApBkC;AAqBlC,MArBkC,EAsBlC,QAtBkC,EAuBlC,UAvBkC,EAwBlC,MAxBkC,EAyBlC,MAzBkC,EA0BlC,MA1BkC,EA2BlC,UA3BkC;AA4BlC,mBA5BkC,EA6BlC,MA7BkC,EA8BlC,WA9BkC,EA+BlC,MA/BkC,EAgClC,UAhCkC,EAiClC,OAjCkC,EAkClC,MAlCkC,EAmClC,OAnCkC,EAoClC,UApCkC;AAqClC,OArCkC,EAsClC,KAtCkC;AAuClC,SAvCkC,EAwClC,SAxCkC,EAyClC,cAzCkC;AA0ClC,QA1CkC,EA2ClC,WA3CkC,EA4ClC,OA5CkC,EA6ClC,UA7CkC,EA8ClC,UA9CkC,EA+ClC,MA/CkC,EAgDlC,SAhDkC,EAiDlC,SAjDkC,EAkDlC,OAlDkC,EAmDlC,KAnDkC,EAoDlC,SApDkC,EAqDlC,MArDkC,EAsDlC,OAtDkC,EAuDlC,QAvDkC,CAA7B;;AA0DP,AAAO,IAAMC,oBAAoB,IAAI9I,MAAJ,CAAW6I,qBAAqB5I,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO,IAAM8I,iBAAiB,wCAAvB;;;AAGP,AAAO;;;;AAIP,AAAO;AAgBP,AAAO;;;AAGP,AAAO,IAAMC,UAAU,IAAIhJ,MAAJ,CAAW,iBAAX,EAA8B,GAA9B,CAAhB;;;;;;AAMP,AAAO;;;;AAIP,AAAO;;;;AAIP,AAAO;;;AAGP,AAAO;;;AAGP,AAAO;;;;AAIP,AAAO,IAAMiJ,mBAAmB,CAC9B,SAD8B,EAE9B,OAF8B,EAG9B,YAH8B,EAI9B,MAJ8B,EAK9B,IAL8B,EAM9B,QAN8B,EAO9B,QAP8B,EAQ9B,SAR8B,EAS9B,KAT8B,EAU9B,UAV8B,EAW9B,IAX8B,EAY9B,KAZ8B,EAa9B,IAb8B,EAc9B,IAd8B,EAe9B,OAf8B,EAgB9B,UAhB8B,EAiB9B,YAjB8B,EAkB9B,QAlB8B,EAmB9B,QAnB8B,EAoB9B,MApB8B,EAqB9B,IArB8B,EAsB9B,IAtB8B,EAuB9B,IAvB8B,EAwB9B,IAxB8B,EAyB9B,IAzB8B,EA0B9B,IA1B8B,EA2B9B,QA3B8B,EA4B9B,QA5B8B,EA6B9B,IA7B8B,EA8B9B,IA9B8B,EA+B9B,KA/B8B,EAgC9B,QAhC8B,EAiC9B,IAjC8B,EAkC9B,QAlC8B,EAmC9B,GAnC8B,EAoC9B,KApC8B,EAqC9B,UArC8B,EAsC9B,SAtC8B,EAuC9B,OAvC8B,EAwC9B,OAxC8B,EAyC9B,UAzC8B,EA0C9B,OA1C8B,EA2C9B,IA3C8B,EA4C9B,OA5C8B,EA6C9B,IA7C8B,EA8C9B,IA9C8B,EA+C9B,OA/C8B,CAAzB;AAiDP,AAAO,IAAMC,sBAAsB,IAAIlJ,MAAJ,QAAgBiJ,iBAAiBhJ,IAAjB,CAAsB,GAAtB,CAAhB,SAAgD,GAAhD,CAA5B;;;;;;AAMP,IAAMkJ,sBAAsBX,8BAA8BvI,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,IAAMmJ,uBAAuB,IAAIpJ,MAAJ,CAAWmJ,mBAAX,EAAgC,GAAhC,CAA7B;;AAEP,IAAME,sBAAsBZ,8BAA8BxI,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,IAAMqJ,uBAAuB,IAAItJ,MAAJ,CAAWqJ,mBAAX,EAAgC,GAAhC,CAA7B,CAEP,AAAO,AAEP,AAAO,AACP,AAAO,AACP,AAAO,AAEP,AAAO;;AClYQ,SAASE,uBAAT,CAAiC3H,CAAjC,EAAoC;;;;;;;;;;IAU/C,GAAF,EAAO4H,GAAP,CAAW,GAAX,EAAgBzH,IAAhB,CAAqB,UAACgB,KAAD,EAAQd,IAAR,EAAiB;QAC9BC,QAAQN,EAAEK,IAAF,CAAd;QACMwH,UAAUvH,MAAME,IAAN,CAAW,OAAX,CAAhB;QACMsH,KAAKxH,MAAME,IAAN,CAAW,IAAX,CAAX;QACI,CAACsH,EAAD,IAAO,CAACD,OAAZ,EAAqB;;QAEfE,cAAgBF,WAAW,EAA3B,WAAiCC,MAAM,EAAvC,CAAN;QACIJ,qBAAqBnI,IAArB,CAA0BwI,UAA1B,CAAJ,EAA2C;;KAA3C,MAEO,IAAIP,qBAAqBjI,IAArB,CAA0BwI,UAA1B,CAAJ,EAA2C;YAC1CrG,MAAN;;GAVJ;;SAcO1B,CAAP;;;AC3BF;;;;;;;;;AASA,AAAe,SAASgI,UAAT,CAAiBhI,CAAjB,EAAoB;MAC7BiI,aAAa,KAAjB;IACE,IAAF,EAAQ9H,IAAR,CAAa,UAACgB,KAAD,EAAQ+G,OAAR,EAAoB;QACzBC,cAAcnI,EAAEkI,OAAF,EAAWE,IAAX,GAAkB7J,GAAlB,CAAsB,CAAtB,CAApB;;QAEI4J,eAAeA,YAAYhF,OAAZ,KAAwB,IAA3C,EAAiD;mBAClC,IAAb;QACE+E,OAAF,EAAWxG,MAAX;KAFF,MAGO,IAAIuG,UAAJ,EAAgB;mBACR,KAAb;;mBAEaC,OAAb,EAAsBlI,CAAtB,EAAyB,IAAzB;;GATJ;;SAaOA,CAAP;;;ACxBF;;;;;;;;;;;AAWA,AAAe,SAASqI,YAAT,CAAsBhI,IAAtB,EAA4BL,CAA5B,EAA2C;MAAZsI,EAAY,uEAAP,KAAO;;MAClDhI,QAAQN,EAAEK,IAAF,CAAd;;MAEIiI,EAAJ,EAAQ;QACFC,UAAUlI,KAAKmI,WAAnB;QACMC,IAAIzI,EAAE,SAAF,CAAV;;;;WAIOuI,WAAW,EAAEA,QAAQpF,OAAR,IAAmBmE,oBAAoB/H,IAApB,CAAyBgJ,QAAQpF,OAAjC,CAArB,CAAlB,EAAmF;UAC3EqF,cAAcD,QAAQC,WAA5B;QACED,OAAF,EAAWG,QAAX,CAAoBD,CAApB;gBACUD,WAAV;;;UAGI5E,WAAN,CAAkB6E,CAAlB;UACM/G,MAAN;WACO1B,CAAP;;;SAGKA,CAAP;;;AC7BF,SAAS2I,WAAT,CAAqB3I,CAArB,EAAwB;IACpB,KAAF,EAASG,IAAT,CAAc,UAACgB,KAAD,EAAQyH,GAAR,EAAgB;QACtBC,OAAO7I,EAAE4I,GAAF,CAAb;QACME,cAAcD,KAAKvG,QAAL,CAAcwE,mBAAd,EAAmCvE,MAAnC,KAA8C,CAAlE;;QAEIuG,WAAJ,EAAiB;oBACDD,IAAd,EAAoB7I,CAApB,EAAuB,GAAvB;;GALJ;;SASOA,CAAP;;;AAGF,SAAS+I,YAAT,CAAsB/I,CAAtB,EAAyB;IACrB,MAAF,EAAUG,IAAV,CAAe,UAACgB,KAAD,EAAQ6H,IAAR,EAAiB;QACxBC,QAAQjJ,EAAEgJ,IAAF,CAAd;QACMF,cAAcG,MAAM3F,OAAN,CAAc,QAAd,EAAwBf,MAAxB,KAAmC,CAAvD;QACIuG,WAAJ,EAAiB;oBACDG,KAAd,EAAqBjJ,CAArB,EAAwB,GAAxB;;GAJJ;;SAQOA,CAAP;;;;;;;;;;;;;;;AAeF,AAAe,SAASkJ,sBAAT,CAA6BlJ,CAA7B,EAAgC;MACzCgI,WAAQhI,CAAR,CAAJ;MACI2I,YAAY3I,CAAZ,CAAJ;MACI+I,aAAa/I,CAAb,CAAJ;;SAEOA,CAAP;;;AC9Ca,SAASmJ,aAAT,CAAuB7I,KAAvB,EAA8BN,CAA9B,EAA4C;MAAXwG,GAAW,uEAAL,GAAK;;MACnDnG,OAAOC,MAAM/B,GAAN,CAAU,CAAV,CAAb;MACI,CAAC8B,IAAL,EAAW;WACFL,CAAP;;;mBAEkBM,MAAM/B,GAAN,CAAU,CAAV,CALqC;MAKjDyC,OALiD,cAKjDA,OALiD;;MAMnDoI,eAAe,iBAAgBpI,OAAhB,EACQiF,GADR,CACY;WAAUP,GAAV,SAAiB1E,QAAQ0E,GAAR,CAAjB;GADZ,EAEQrH,IAFR,CAEa,GAFb,CAArB;;QAIMuF,WAAN,OAAsB4C,GAAtB,SAA6B4C,YAA7B,SAA6C9I,MAAMkB,QAAN,EAA7C,UAAkEgF,GAAlE;SACOxG,CAAP;;;ACTF,SAASqJ,cAAT,CAAwBnE,IAAxB,EAA8BlF,CAA9B,EAAiC;MACzBsJ,SAASC,SAASrE,KAAK1E,IAAL,CAAU,QAAV,CAAT,EAA8B,EAA9B,CAAf;MACMuD,QAAQwF,SAASrE,KAAK1E,IAAL,CAAU,OAAV,CAAT,EAA6B,EAA7B,KAAoC,EAAlD;;;;;MAKI,CAAC8I,UAAU,EAAX,IAAiB,EAAjB,IAAuBvF,QAAQ,EAAnC,EAAuC;SAChCrC,MAAL;GADF,MAEO,IAAI4H,MAAJ,EAAY;;;;SAIZ7I,UAAL,CAAgB,QAAhB;;;SAGKT,CAAP;;;;;AAKF,SAASwJ,aAAT,CAAuBtE,IAAvB,EAA6BlF,CAA7B,EAAgC;MAC1B2F,UAAUpG,IAAV,CAAe2F,KAAK1E,IAAL,CAAU,KAAV,CAAf,CAAJ,EAAsC;SAC/BkB,MAAL;;;SAGK1B,CAAP;;;AAGF,AAAe,SAASyJ,WAAT,CAAqBC,QAArB,EAA+B1J,CAA/B,EAAkC;WACtCuB,IAAT,CAAc,KAAd,EAAqBpB,IAArB,CAA0B,UAACgB,KAAD,EAAQJ,GAAR,EAAgB;QAClCmE,OAAOlF,EAAEe,GAAF,CAAb;;mBAEemE,IAAf,EAAqBlF,CAArB;kBACckF,IAAd,EAAoBlF,CAApB;GAJF;;SAOOA,CAAP;;;AChCa,SAAS2J,UAAT,CAAoBC,OAApB,EAA6B5J,CAA7B,EAAgCR,GAAhC,EAAgD;MAAXqK,IAAW,uEAAJ,EAAI;;MACzDA,KAAKtH,MAAL,KAAgB,CAApB,EAAuB;WACdsD,cAAP;;;MAGErG,GAAJ,EAAS;qBACwBE,IAAIC,KAAJ,CAAUH,GAAV,CADxB;QACCsK,QADD,cACCA,QADD;QACWhM,QADX,cACWA,QADX;;wCAEI+L,IAAX,sBAAiCC,QAAjC,UAA8ChM,QAA9C;;;IAGA+L,KAAKxL,IAAL,CAAU,GAAV,CAAF,EAAkBuL,OAAlB,EAA2BG,QAA3B,CAAoCnE,UAApC;;SAEO5F,CAAP;;;ACda,SAASgK,aAAT,CAAuBJ,OAAvB,EAAgC5J,CAAhC,EAA8C;MAAX6J,IAAW,uEAAJ,EAAI;;MACvDA,KAAKtH,MAAL,KAAgB,CAApB,EAAuB;WACduD,iBAAP;;;;;IAKA+D,KAAKxL,IAAL,CAAU,GAAV,CAAF,EAAkBuL,OAAlB,EAA2BhC,GAA3B,OAAmChC,UAAnC,EAAiDlE,MAAjD;;;UAGMkE,UAAN,EAAoBgE,OAApB,EAA6BK,WAA7B,CAAyCrE,UAAzC;;SAEO5F,CAAP;;;ACfF;;;;AAGA,AAAe,SAASkK,aAAT,CAAoBN,OAApB,EAA6B5J,CAA7B,EAAgC;MACvCmK,SAASnK,EAAE,IAAF,EAAQ4J,OAAR,CAAf;;MAEIO,OAAO5H,MAAP,GAAgB,CAApB,EAAuB;WACdpC,IAAP,CAAY,UAACgB,KAAD,EAAQd,IAAR;aAAiBL,EAAEK,IAAF,EAAQqB,MAAR,EAAjB;KAAZ;GADF,MAEO;WACEvB,IAAP,CAAY,UAACgB,KAAD,EAAQd,IAAR,EAAiB;oBACbL,EAAEK,IAAF,CAAd,EAAuBL,CAAvB,EAA0B,IAA1B;KADF;;;SAKKA,CAAP;;;ACdF,SAASoK,qBAAT,CAA+BV,QAA/B,EAAyC;WAC9BnI,IAAT,CAAc,GAAd,EAAmBpB,IAAnB,CAAwB,UAACgB,KAAD,EAAQd,IAAR,EAAiB;SAClCW,OAAL,GAAe,iBAAgBX,KAAKW,OAArB,EAA8B2B,MAA9B,CAAqC,UAACC,GAAD,EAAMpC,IAAN,EAAe;UAC7D6F,mBAAmB9G,IAAnB,CAAwBiB,IAAxB,CAAJ,EAAmC;4BACrBoC,GAAZ,sBAAkBpC,IAAlB,EAAyBH,KAAKW,OAAL,CAAaR,IAAb,CAAzB;;;aAGKoC,GAAP;KALa,EAMZ,EANY,CAAf;GADF;;SAUO8G,QAAP;;;;;;;;;;AAUF,AAAe,SAASW,eAAT,CAAyBX,QAAzB,EAAmC;;;;SAIzCU,sBACLV,SAASY,MAAT,GAAkB/H,MAAlB,GACEmH,SAASY,MAAT,EADF,GACsBZ,QAFjB,CAAP;;;AC3Ba,SAASa,WAAT,CAAqBb,QAArB,EAA+B1J,CAA/B,EAAkC;WACtCuB,IAAT,CAAc,GAAd,EAAmBpB,IAAnB,CAAwB,UAACgB,KAAD,EAAQsH,CAAR,EAAc;QAC9B+B,KAAKxK,EAAEyI,CAAF,CAAX;QACI+B,GAAGjJ,IAAH,CAAQ,aAAR,EAAuBgB,MAAvB,KAAkC,CAAlC,IAAuCiI,GAAGC,IAAH,GAAUC,IAAV,OAAqB,EAAhE,EAAoEF,GAAG9I,MAAH;GAFtE;;SAKO1B,CAAP;;;ACNF;;;;;;AAMA,AAAO,IAAM4G,kCAAgC,CAC3C,UAD2C,EAE3C,OAF2C,EAG3C,QAH2C,EAI3C,SAJ2C,EAK3C,SAL2C,EAM3C,KAN2C,EAO3C,gBAP2C,EAQ3C,OAR2C,EAS3C,SAT2C,EAU3C,cAV2C,EAW3C,QAX2C,EAY3C,iBAZ2C,EAa3C,OAb2C,EAc3C,MAd2C,EAe3C,MAf2C,EAgB3C,QAhB2C,EAiB3C,QAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C;AAoB3C,MApB2C,EAqB3C,MArB2C,EAsB3C,KAtB2C,EAuB3C,OAvB2C,EAwB3C,YAxB2C,EAyB3C,UAzB2C;AA0B3C,2BA1B2C;AA2B3C,OA3B2C,EA4B3C,eA5B2C,EA6B3C,SA7B2C,EA8B3C,QA9B2C,EA+B3C,QA/B2C,EAgC3C,KAhC2C,EAiC3C,OAjC2C,EAkC3C,UAlC2C,EAmC3C,SAnC2C,EAoC3C,UApC2C,EAqC3C,SArC2C,EAsC3C,OAtC2C,CAAtC;;;;;;;;;;;;;AAoDP,AAAO,IAAMC,kCAAgC,CAC3C,KAD2C,EAE3C,SAF2C,EAG3C,MAH2C,EAI3C,WAJ2C,EAK3C,QAL2C,EAM3C,SAN2C,EAO3C,qBAP2C,EAQ3C,QAR2C;AAS3C,OAT2C,EAU3C,QAV2C,EAW3C,OAX2C,EAY3C,MAZ2C,EAa3C,MAb2C,EAc3C,OAd2C,EAe3C,QAf2C,CAAtC;;;;;AAqBP,AAAO,IAAMC,wBAAsB,CACjC,GADiC,EAEjC,YAFiC,EAGjC,IAHiC,EAIjC,KAJiC,EAKjC,KALiC,EAMjC,GANiC,EAOjC,KAPiC,EAQjC,OARiC,EASjCzI,IATiC,CAS5B,GAT4B,CAA5B;;;;AAaP,AAAO,IAAMsM,2BAAyB,CACpC,IADoC,EAEpC,GAFoC,EAGpC,GAHoC,EAIpC,OAJoC,EAKpC,IALoC,EAMpC,MANoC,EAOpC,MAPoC,EAQpC,UARoC,EASpC,OAToC,EAUpC,KAVoC,EAWpC,MAXoC,EAYpC,MAZoC,CAA/B;;AAeP,AAAO,IAAMC,8BACX,IAAIxM,MAAJ,QAAgBuM,yBAAuBtM,IAAvB,CAA4B,GAA5B,CAAhB,SAAsD,GAAtD,CADK;;;;;AAMP,AAAO,IAAMwM,4BAA0B,CACrC,CAAC,SAAD,EAAY,gBAAZ,CADqC,EAErC,CAAC,OAAD,EAAU,gBAAV,CAFqC,EAGrC,CAAC,QAAD,EAAW,gBAAX,CAHqC,EAIrC,CAAC,OAAD,EAAU,WAAV,CAJqC,EAKrC,CAAC,OAAD,EAAU,YAAV,CALqC,EAMrC,CAAC,OAAD,EAAU,YAAV,CANqC,CAAhC;;AASP,AAAO,IAAMC,gBAAc,CACzB,QADyB,EAEzB,OAFyB,EAGzB,OAHyB,EAIzB,SAJyB,CAApB;AAMP,AAAO,IAAMC,mBAAiB,IAAI3M,MAAJ,CAAW0M,cAAYzM,IAAZ,CAAiB,GAAjB,CAAX,EAAkC,GAAlC,CAAvB;;;;;;AAMP,AAAO,IAAM0I,yBAAuB,CAClC,SADkC,EAElC,gBAFkC,EAGlC,iBAHkC,EAIlC,MAJkC,EAKlC,MALkC,EAMlC,SANkC,EAOlC,qBAPkC,EAQlC,OARkC,EASlC,QATkC,EAUlC,MAVkC,EAWlC,QAXkC,EAYlC,MAZkC,EAalC,YAbkC,EAclC,WAdkC,EAelC,MAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,UAlBkC;AAmBlC,SAnBkC,CAA7B;;;AAuBP,AAAO,IAAMC,sBAAoB,IAAI5I,MAAJ,CAAW2I,uBAAqB1I,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO,IAAM2M,sBAAoB,IAAI5M,MAAJ,CAAW,qBAAX,EAAkC,GAAlC,CAA1B;;;;;;AAMP,AAAO,IAAM6I,yBAAuB,CAClC,OADkC,EAElC,QAFkC,EAGlC,QAHkC,EAIlC,KAJkC,EAKlC,UALkC,EAMlC,QANkC,EAOlC,QAPkC,EAQlC,OARkC,EASlC,MATkC,EAUlC,OAVkC,EAWlC,SAXkC,EAYlC,YAZkC,EAalC,SAbkC,EAclC,MAdkC,EAelC,QAfkC,EAgBlC,OAhBkC,EAiBlC,MAjBkC,EAkBlC,MAlBkC,EAmBlC,SAnBkC,EAoBlC,UApBkC;AAqBlC,MArBkC,EAsBlC,QAtBkC,EAuBlC,UAvBkC,EAwBlC,MAxBkC,EAyBlC,MAzBkC,EA0BlC,MA1BkC,EA2BlC,UA3BkC;AA4BlC,mBA5BkC,EA6BlC,MA7BkC,EA8BlC,WA9BkC,EA+BlC,MA/BkC,EAgClC,UAhCkC,EAiClC,OAjCkC,EAkClC,MAlCkC,EAmClC,OAnCkC,EAoClC,UApCkC;AAqClC,OArCkC,EAsClC,KAtCkC;AAuClC,SAvCkC,EAwClC,SAxCkC,EAyClC,cAzCkC;AA0ClC,QA1CkC,EA2ClC,WA3CkC,EA4ClC,OA5CkC,EA6ClC,UA7CkC,EA8ClC,UA9CkC,EA+ClC,MA/CkC,EAgDlC,SAhDkC,EAiDlC,SAjDkC,EAkDlC,OAlDkC,EAmDlC,KAnDkC,EAoDlC,SApDkC,EAqDlC,MArDkC,EAsDlC,OAtDkC,EAuDlC,QAvDkC,CAA7B;;AA0DP,AAAO,IAAMC,sBAAoB,IAAI9I,MAAJ,CAAW6I,uBAAqB5I,IAArB,CAA0B,GAA1B,CAAX,EAA2C,GAA3C,CAA1B;;;AAGP,AAAO,AAAM4M;;;AAGb,AAAO,AAAMC;;;AAGb,AAAO,AAAMC;;;;AAIb,AAAO,AAAM9D;AAiDb,AAAO,AAAMC,AAAsCD;;;;;;AAMnD,IAAME,wBAAsBX,gCAA8BvI,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,AAAMmJ,AAAkCD,AAAX;;AAEpC,IAAME,wBAAsBZ,gCAA8BxI,IAA9B,CAAmC,GAAnC,CAA5B;AACA,AAAO,AAAMqJ,AAAkCD,AAAX;;AAEpC,AAAO,AAAM2D,AAA8B3D,AAAhB,AAAyCF,AAAzC;;AAE3B,AAAO,IAAM8D,yBAAuB,IAAIjN,MAAJ,CAAW,mBAAX,EAAgC,GAAhC,CAA7B;AACP,AAAO,IAAMkN,uBAAqB,IAAIlN,MAAJ,CAAW,4BAAX,EAAyC,GAAzC,CAA3B;AACP,AAAO,IAAMmN,aAAW,IAAInN,MAAJ,CAAW,kBAAX,EAA+B,GAA/B,CAAjB,CAEP,AAAO,AAAMoN;;ACzSb;AACA,AAAe,SAASC,SAAT,CAAmBpL,IAAnB,EAAyB;MAChCwH,UAAUxH,KAAKG,IAAL,CAAU,OAAV,CAAhB;MACMsH,KAAKzH,KAAKG,IAAL,CAAU,IAAV,CAAX;MACIkL,QAAQ,CAAZ;;MAEI5D,EAAJ,EAAQ;;QAEFd,oBAAkBzH,IAAlB,CAAuBuI,EAAvB,CAAJ,EAAgC;eACrB,EAAT;;QAEEZ,oBAAkB3H,IAAlB,CAAuBuI,EAAvB,CAAJ,EAAgC;eACrB,EAAT;;;;MAIAD,OAAJ,EAAa;QACP6D,UAAU,CAAd,EAAiB;;;UAGX1E,oBAAkBzH,IAAlB,CAAuBsI,OAAvB,CAAJ,EAAqC;iBAC1B,EAAT;;UAEEX,oBAAkB3H,IAAlB,CAAuBsI,OAAvB,CAAJ,EAAqC;iBAC1B,EAAT;;;;;;;QAOAkD,iBAAexL,IAAf,CAAoBsI,OAApB,CAAJ,EAAkC;eACvB,EAAT;;;;;;;QAOEmD,oBAAkBzL,IAAlB,CAAuBsI,OAAvB,CAAJ,EAAqC;eAC1B,EAAT;;;;SAIG6D,KAAP;;;ACnDF;;;AAGA,AAAe,SAASC,QAAT,CAAkBrL,KAAlB,EAAyB;SAC/BsL,WAAWtL,MAAME,IAAN,CAAW,OAAX,CAAX,KAAmC,IAA1C;;;ACJF;AACA,AAAe,SAASqL,WAAT,CAAqBpB,IAArB,EAA2B;SACjC,CAACA,KAAKlF,KAAL,CAAW,IAAX,KAAoB,EAArB,EAAyBhD,MAAhC;;;ACFF,IAAMuJ,QAAQ,IAAI1N,MAAJ,CAAW,WAAX,EAAwB,GAAxB,CAAd;;AAEA,AAAe,SAAS2N,WAAT,CAAqBC,UAArB,EAAgD;MAAf7I,OAAe,uEAAL,GAAK;;MACvD8I,SAASD,aAAa,EAA5B;;MAEIC,SAAS,CAAb,EAAgB;QACVC,oBAAJ;;;;;;;QAOIJ,MAAMvM,IAAN,CAAW4D,OAAX,CAAJ,EAAyB;oBACT8I,SAAS,CAAvB;KADF,MAEO;oBACSA,SAAS,IAAvB;;;WAGKE,KAAKC,GAAL,CAASD,KAAKE,GAAL,CAASH,WAAT,EAAsB,CAAtB,CAAT,EAAmC,CAAnC,CAAP;;;SAGK,CAAP;;;ACjBF;;AAEA,AAAe,SAASI,iBAAT,CAAwBjM,IAAxB,EAA8B;MACvCqL,QAAQ,CAAZ;MACMjB,OAAOpK,KAAKoK,IAAL,GAAYC,IAAZ,EAAb;MACMsB,aAAavB,KAAKlI,MAAxB;;;MAGIyJ,aAAa,EAAjB,EAAqB;WACZ,CAAP;;;;WAIOH,YAAYpB,IAAZ,CAAT;;;;WAISsB,YAAYC,UAAZ,CAAT;;;;;;MAMIvB,KAAK8B,KAAL,CAAW,CAAC,CAAZ,MAAmB,GAAvB,EAA4B;aACjB,CAAT;;;SAGKb,KAAP;;;AChCa,SAASc,QAAT,CAAkBlM,KAAlB,EAAyBN,CAAzB,EAA4B0L,KAA5B,EAAmC;QAC1ClL,IAAN,CAAW,OAAX,EAAoBkL,KAApB;SACOpL,KAAP;;;ACGa,SAASmM,WAAT,CAAkBnM,KAAlB,EAAyBN,CAAzB,EAA4B0M,MAA5B,EAAoC;MAC7C;QACIhB,QAAQiB,kBAAerM,KAAf,EAAsBN,CAAtB,IAA2B0M,MAAzC;aACSpM,KAAT,EAAgBN,CAAhB,EAAmB0L,KAAnB;GAFF,CAGE,OAAOkB,CAAP,EAAU;;;;SAILtM,KAAP;;;ACXF;AACA,AAAe,SAASuM,cAAT,CAAqBxM,IAArB,EAA2BL,CAA3B,EAA8B0L,KAA9B,EAAqC;MAC5CpB,SAASjK,KAAKiK,MAAL,EAAf;MACIA,MAAJ,EAAY;gBACDA,MAAT,EAAiBtK,CAAjB,EAAoB0L,QAAQ,IAA5B;;;SAGKrL,IAAP;;;ACFF;;;AAGA,AAAe,SAASsM,iBAAT,CAAwBrM,KAAxB,EAA+BN,CAA/B,EAAsD;MAApB8M,WAAoB,uEAAN,IAAM;;MAC/DpB,QAAQC,SAASrL,KAAT,CAAZ;;MAEIoL,KAAJ,EAAW;WACFA,KAAP;;;UAGMqB,aAAUzM,KAAV,CAAR;;MAEIwM,WAAJ,EAAiB;aACNrB,UAAUnL,KAAV,CAAT;;;iBAGUA,KAAZ,EAAmBN,CAAnB,EAAsB0L,KAAtB;;SAEOA,KAAP;;;AClBF;;AAEA,AAAe,SAASqB,YAAT,CAAmBzM,KAAnB,EAA0B;mBACnBA,MAAM/B,GAAN,CAAU,CAAV,CADmB;MAC/B4E,OAD+B,cAC/BA,OAD+B;;;;;;;MAMnCkI,uBAAqB9L,IAArB,CAA0B4D,OAA1B,CAAJ,EAAwC;WAC/BmJ,kBAAehM,KAAf,CAAP;GADF,MAEO,IAAI6C,YAAY,KAAhB,EAAuB;WACrB,CAAP;GADK,MAEA,IAAImI,qBAAmB/L,IAAnB,CAAwB4D,OAAxB,CAAJ,EAAsC;WACpC,CAAP;GADK,MAEA,IAAIoI,WAAShM,IAAT,CAAc4D,OAAd,CAAJ,EAA4B;WAC1B,CAAC,CAAR;GADK,MAEA,IAAIA,YAAY,IAAhB,EAAsB;WACpB,CAAC,CAAR;;;SAGK,CAAP;;;ACjBF,SAAS4F,cAAT,CAAsBzI,KAAtB,EAA6BN,CAA7B,EAAgC;MAC1BM,MAAM/B,GAAN,CAAU,CAAV,CAAJ,EAAkB;qBACI+B,MAAM/B,GAAN,CAAU,CAAV,CADJ;QACR4E,OADQ,cACRA,OADQ;;QAGZA,YAAY,MAAhB,EAAwB;;oBAER7C,KAAd,EAAqBN,CAArB,EAAwB,KAAxB;;;;;AAKN,SAASgN,UAAT,CAAoB1M,KAApB,EAA2BN,CAA3B,EAA8B0L,KAA9B,EAAqC;MAC/BpL,KAAJ,EAAW;mBACIA,KAAb,EAAoBN,CAApB;gBACSM,KAAT,EAAgBN,CAAhB,EAAmB0L,KAAnB;;;;AAIJ,SAASuB,OAAT,CAAiBjN,CAAjB,EAAoB8M,WAApB,EAAiC;IAC7B,QAAF,EAAYlF,GAAZ,CAAgB,SAAhB,EAA2BzH,IAA3B,CAAgC,UAACgB,KAAD,EAAQd,IAAR,EAAiB;;;QAG3CC,QAAQN,EAAEK,IAAF,CAAZ;YACQmM,SAASlM,KAAT,EAAgBN,CAAhB,EAAmB2M,kBAAerM,KAAf,EAAsBN,CAAtB,EAAyB8M,WAAzB,CAAnB,CAAR;;QAEMzJ,UAAU/C,MAAMgK,MAAN,EAAhB;QACM4C,WAAWH,aAAUzM,KAAV,CAAjB;;eAEW+C,OAAX,EAAoBrD,CAApB,EAAuBkN,QAAvB,EAAiCJ,WAAjC;QACIzJ,OAAJ,EAAa;;;iBAGAA,QAAQiH,MAAR,EAAX,EAA6BtK,CAA7B,EAAgCkN,WAAW,CAA3C,EAA8CJ,WAA9C;;GAbJ;;SAiBO9M,CAAP;;;;;AAKF,AAAe,SAASmN,eAAT,CAAsBnN,CAAtB,EAA6C;MAApB8M,WAAoB,uEAAN,IAAM;;;;4BAGlC7L,OAAxB,CAAgC,gBAAqC;;QAAnCmM,cAAmC;QAAnBC,aAAmB;;MAC9DD,cAAL,SAAuBC,aAAvB,EAAwClN,IAAxC,CAA6C,UAACgB,KAAD,EAAQd,IAAR,EAAiB;kBACnDL,EAAEK,IAAF,EAAQiK,MAAR,CAAe8C,cAAf,CAAT,EAAyCpN,CAAzC,EAA4C,EAA5C;KADF;GADF;;;;;;;UAWQA,CAAR,EAAW8M,WAAX;UACQ9M,CAAR,EAAW8M,WAAX;;SAEO9M,CAAP;;;ACpEF,IAAMsN,eAAe,SAArB;;AAEA,AAAe,SAASC,eAAT,CAAyB9C,IAAzB,EAA+B;SACrCA,KAAKzG,OAAL,CAAasJ,YAAb,EAA2B,GAA3B,EAAgC5C,IAAhC,EAAP;;;ACHF;;;;;AAKA,AAAe,SAAS8C,cAAT,CAAwBhO,GAAxB,EAA6BiO,SAA7B,EAAwC;MAC/CC,UAAUD,UAAUlM,IAAV,CAAe;WAAMoM,GAAGpO,IAAH,CAAQC,GAAR,CAAN;GAAf,CAAhB;MACIkO,OAAJ,EAAa;WACJA,QAAQE,IAAR,CAAapO,GAAb,EAAkB,CAAlB,CAAP;;;SAGK,IAAP;;;ACXF;;;;;;;;;;;;;;;;AAgBA,AAAO,IAAMqO,kBAAkB,IAAIzP,MAAJ,CAAW,0EAAX,EAAuF,GAAvF,CAAxB;;AAEP,AAAO,IAAM0P,eAAe,QAArB;;AAEP,AAAO,IAAMC,cAAc,WAApB;AACP,AAAO,IAAMC,cAAc,WAApB;;ACnBQ,SAASC,cAAT,CAAwBzO,GAAxB,EAA6B;MACpC0O,UAAU1O,IAAI+F,KAAJ,CAAUsI,eAAV,CAAhB;MACI,CAACK,OAAL,EAAc,OAAO,IAAP;;MAERC,UAAU5E,SAAS2E,QAAQ,CAAR,CAAT,EAAqB,EAArB,CAAhB;;;;SAIOC,UAAU,GAAV,GAAgBA,OAAhB,GAA0B,IAAjC;;;ACVa,SAASC,YAAT,CAAsB5O,GAAtB,EAA2B;SACjCA,IAAIoF,KAAJ,CAAU,GAAV,EAAe,CAAf,EAAkBZ,OAAlB,CAA0B,KAA1B,EAAiC,EAAjC,CAAP;;;ACOF,SAASqK,aAAT,CAAuBC,OAAvB,EAAgCnN,KAAhC,EAAuCoN,sBAAvC,EAA+D;MACzDC,cAAc,IAAlB;;;;MAIIrN,QAAQ,CAAR,IAAa6M,YAAYzO,IAAZ,CAAiB+O,OAAjB,CAAb,IAA0CA,QAAQ/L,MAAR,GAAiB,CAA/D,EAAkE;kBAClD,IAAd;;;;;MAKEpB,UAAU,CAAV,IAAemN,QAAQG,WAAR,OAA0B,OAA7C,EAAsD;kBACtC,KAAd;;;;;MAKEtN,QAAQ,CAAR,IAAamN,QAAQ/L,MAAR,GAAiB,CAA9B,IAAmC,CAACgM,sBAAxC,EAAgE;kBAChD,KAAd;;;SAGKC,WAAP;;;;;;AAMF,AAAe,SAASE,cAAT,CAAwBlP,GAAxB,EAA6BmP,MAA7B,EAAqC;MAC5ClP,YAAYkP,UAAUjP,IAAIC,KAAJ,CAAUH,GAAV,CAA5B;MACQsK,QAF0C,GAEjBrK,SAFiB,CAE1CqK,QAF0C;MAEhC8E,IAFgC,GAEjBnP,SAFiB,CAEhCmP,IAFgC;MAE1BC,IAF0B,GAEjBpP,SAFiB,CAE1BoP,IAF0B;;;MAI9CN,yBAAyB,KAA7B;MACMO,kBAAkBD,KAAKjK,KAAL,CAAW,GAAX,EACvBmK,OADuB,GAEvBpM,MAFuB,CAEhB,UAACC,GAAD,EAAMoM,UAAN,EAAkB7N,KAAlB,EAA4B;QAC9BmN,UAAUU,UAAd;;;QAGIV,QAAQpM,QAAR,CAAiB,GAAjB,CAAJ,EAA2B;2BACUoM,QAAQ1J,KAAR,CAAc,GAAd,CADV;;UAClBqK,eADkB;UACDC,OADC;;UAErBnB,YAAYxO,IAAZ,CAAiB2P,OAAjB,CAAJ,EAA+B;kBACnBD,eAAV;;;;;;QAMApB,gBAAgBtO,IAAhB,CAAqB+O,OAArB,KAAiCnN,QAAQ,CAA7C,EAAgD;gBACpCmN,QAAQtK,OAAR,CAAgB6J,eAAhB,EAAiC,EAAjC,CAAV;;;;;;;QAOE1M,UAAU,CAAd,EAAiB;+BACU2M,aAAavO,IAAb,CAAkB+O,OAAlB,CAAzB;;;;QAIED,cAAcC,OAAd,EAAuBnN,KAAvB,EAA8BoN,sBAA9B,CAAJ,EAA2D;UACrDY,IAAJ,CAASb,OAAT;;;WAGK1L,GAAP;GAhCsB,EAiCrB,EAjCqB,CAAxB;;SAmCUkH,QAAV,UAAuB8E,IAAvB,GAA8BE,gBAAgBC,OAAhB,GAA0B1Q,IAA1B,CAA+B,GAA/B,CAA9B;;;AC3EF;;AAEA,IAAM+Q,kBAAkB,IAAIhR,MAAJ,CAAW,QAAX,CAAxB;AACA,AAAe,SAASiR,cAAT,CAAwB5E,IAAxB,EAA8B;SACpC2E,gBAAgB7P,IAAhB,CAAqBkL,IAArB,CAAP;;;ACJa,SAAS6E,cAAT,CAAwBrN,OAAxB,EAA6C;kBAAZsN,KAAY,uEAAJ,EAAI;;qBACnDtN,QAAQyI,IAAR,GACQ9F,KADR,CACc,KADd,EAEQ2H,KAFR,CAEc,CAFd,EAEiBgD,KAFjB,EAGQlR,IAHR,CAGa,GAHb,CAAP;;;ACQF;;;;;AAKA,AAAe,SAASmR,aAAT,CAAuBC,UAAvB,EAAmCC,QAAnC,EAA6C1P,CAA7C,EAAgD;MACzD,CAACyP,WAAWnF,MAAX,GAAoB/H,MAAzB,EAAiC;WACxBkN,UAAP;;;MAGIE,wBAAwBxD,KAAKE,GAAL,CAAS,EAAT,EAAaqD,WAAW,IAAxB,CAA9B;MACME,cAAc5P,EAAE,aAAF,CAApB;;aAEWsK,MAAX,GAAoBhI,QAApB,GAA+BnC,IAA/B,CAAoC,UAACgB,KAAD,EAAQoH,OAAR,EAAoB;QAChDsH,WAAW7P,EAAEuI,OAAF,CAAjB;;QAEIqC,4BAA0BrL,IAA1B,CAA+BgJ,QAAQpF,OAAvC,CAAJ,EAAqD;aAC5C,IAAP;;;QAGI2M,eAAenE,SAASkE,QAAT,CAArB;QACIC,YAAJ,EAAkB;UACZD,aAAaJ,UAAjB,EAA6B;oBACf9L,MAAZ,CAAmBkM,QAAnB;OADF,MAEO;YACDE,eAAe,CAAnB;YACMC,UAAUC,YAAYJ,QAAZ,CAAhB;;;;YAIIG,UAAU,IAAd,EAAoB;0BACF,EAAhB;;;;;YAKEA,WAAW,GAAf,EAAoB;0BACF,EAAhB;;;;;YAKEH,SAASrP,IAAT,CAAc,OAAd,MAA2BiP,WAAWjP,IAAX,CAAgB,OAAhB,CAA/B,EAAyD;0BACvCkP,WAAW,GAA3B;;;YAGIQ,WAAWJ,eAAeC,YAAhC;;YAEIG,YAAYP,qBAAhB,EAAuC;iBAC9BC,YAAYjM,MAAZ,CAAmBkM,QAAnB,CAAP;SADF,MAEO,IAAItH,QAAQpF,OAAR,KAAoB,GAAxB,EAA6B;cAC5BgN,iBAAiBN,SAASpF,IAAT,EAAvB;cACM2F,uBAAuBpE,WAAWmE,cAAX,CAA7B;;cAEIC,uBAAuB,EAAvB,IAA6BJ,UAAU,IAA3C,EAAiD;mBACxCJ,YAAYjM,MAAZ,CAAmBkM,QAAnB,CAAP;WADF,MAEO,IAAIO,wBAAwB,EAAxB,IAA8BJ,YAAY,CAA1C,IACDX,eAAec,cAAf,CADH,EACmC;mBACjCP,YAAYjM,MAAZ,CAAmBkM,QAAnB,CAAP;;;;;;WAMD,IAAP;GAnDF;;SAsDOD,WAAP;;;ACxEF;;AAEA,AAAe,SAASS,mBAAT,CAA0BrQ,CAA1B,EAA6B;MACtCyP,mBAAJ;MACIC,WAAW,CAAf;;IAEE,SAAF,EAAavP,IAAb,CAAkB,UAACgB,KAAD,EAAQd,IAAR,EAAiB;;QAE7BuK,4BAA0BrL,IAA1B,CAA+Bc,KAAK8C,OAApC,CAAJ,EAAkD;;;;QAI5C7C,QAAQN,EAAEK,IAAF,CAAd;QACMqL,QAAQC,SAASrL,KAAT,CAAd;;QAEIoL,QAAQgE,QAAZ,EAAsB;iBACThE,KAAX;mBACapL,KAAb;;GAXJ;;;;MAiBI,CAACmP,UAAL,EAAiB;WACRzP,EAAE,MAAF,KAAaA,EAAE,GAAF,EAAOsQ,KAAP,EAApB;;;eAGWd,cAAcC,UAAd,EAA0BC,QAA1B,EAAoC1P,CAApC,CAAb;;SAEOyP,UAAP;;;ACjCF,UACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA;;ACDA,SAASc,mBAAT,CAA6BjQ,KAA7B,EAAoCN,CAApC,EAAuCwQ,MAAvC,EAA+C;;;;;MAKzClQ,MAAMmQ,QAAN,CAAe,qBAAf,CAAJ,EAA2C;;;;MAIrCxO,UAAUsL,gBAAgBjN,MAAMmK,IAAN,EAAhB,CAAhB;;MAEIoB,YAAY5J,OAAZ,IAAuB,EAA3B,EAA+B;QACvByO,SAAS1Q,EAAE,GAAF,EAAOM,KAAP,EAAciC,MAA7B;QACMoO,aAAa3Q,EAAE,OAAF,EAAWM,KAAX,EAAkBiC,MAArC;;;QAGIoO,aAAcD,SAAS,CAA3B,EAA+B;YACvBhP,MAAN;;;;QAIIpC,gBAAgB2C,QAAQM,MAA9B;QACMqO,WAAW5Q,EAAE,KAAF,EAASM,KAAT,EAAgBiC,MAAjC;;;;QAIIjD,gBAAgB,EAAhB,IAAsBsR,aAAa,CAAvC,EAA0C;YAClClP,MAAN;;;;QAIIsO,UAAUC,YAAY3P,KAAZ,CAAhB;;;;;QAKIkQ,SAAS,EAAT,IAAeR,UAAU,GAAzB,IAAgC1Q,gBAAgB,EAApD,EAAwD;YAChDoC,MAAN;;;;;;QAME8O,UAAU,EAAV,IAAgBR,UAAU,GAA9B,EAAmC;;;;UAI3B7M,UAAU7C,MAAM/B,GAAN,CAAU,CAAV,EAAa4E,OAA7B;UACM0N,aAAa1N,YAAY,IAAZ,IAAoBA,YAAY,IAAnD;UACI0N,UAAJ,EAAgB;YACRC,eAAexQ,MAAMyQ,IAAN,EAArB;YACID,gBAAgBvD,gBAAgBuD,aAAarG,IAAb,EAAhB,EAAqC8B,KAArC,CAA2C,CAAC,CAA5C,MAAmD,GAAvE,EAA4E;;;;;YAKxE7K,MAAN;;;;QAIIsP,cAAchR,EAAE,QAAF,EAAYM,KAAZ,EAAmBiC,MAAvC;;;QAGIyO,cAAc,CAAd,IAAmB1R,gBAAgB,GAAvC,EAA4C;YACpCoC,MAAN;;;;;;;;;;;;;AAaN,AAAe,SAASuP,YAAT,CAAmBvH,QAAnB,EAA6B1J,CAA7B,EAAgC;IAC3CyG,wBAAF,EAA4BiD,QAA5B,EAAsCvJ,IAAtC,CAA2C,UAACgB,KAAD,EAAQd,IAAR,EAAiB;QACpDC,QAAQN,EAAEK,IAAF,CAAd;QACImQ,SAAS7E,SAASrL,KAAT,CAAb;QACI,CAACkQ,MAAL,EAAa;eACF7D,kBAAerM,KAAf,EAAsBN,CAAtB,CAAT;eACSM,KAAT,EAAgBN,CAAhB,EAAmBwQ,MAAnB;;;;QAIEA,SAAS,CAAb,EAAgB;YACR9O,MAAN;KADF,MAEO;;0BAEepB,KAApB,EAA2BN,CAA3B,EAA8BwQ,MAA9B;;GAbJ;;SAiBOxQ,CAAP;;;ACrGa,SAASkR,YAAT,CAAsBxH,QAAtB,EAAgC1J,CAAhC,EAA+C;MAAZmR,KAAY,uEAAJ,EAAI;;IAC1DxK,eAAF,EAAmB+C,QAAnB,EAA6BvJ,IAA7B,CAAkC,UAACgB,KAAD,EAAQiQ,MAAR,EAAmB;QAC7CC,UAAUrR,EAAEoR,MAAF,CAAhB;;;;;QAKIpR,EAAEqR,OAAF,EAAW3H,QAAX,EAAqB4H,OAArB,CAA6B,GAA7B,EAAkC/O,MAAlC,KAA6C,CAAjD,EAAoD;aAC3C8O,QAAQ3P,MAAR,EAAP;;;;QAIE6L,gBAAgBvN,EAAEoR,MAAF,EAAU3G,IAAV,EAAhB,MAAsC0G,KAA1C,EAAiD;aACxCE,QAAQ3P,MAAR,EAAP;;;;;QAKE+J,UAAUzL,EAAEoR,MAAF,CAAV,IAAuB,CAA3B,EAA8B;aACrBC,QAAQ3P,MAAR,EAAP;;;WAGK2P,OAAP;GArBF;;SAwBOrR,CAAP;;;AC5BF;;;AAEA,AAAe,SAASuR,kBAAT,CAAyB3H,OAAzB,EAAkC5J,CAAlC,EAAqC;;;;MAI9CmJ,cAAcnJ,EAAE,MAAF,CAAd,EAAyBA,CAAzB,EAA4B,KAA5B,CAAJ;MACImJ,cAAcnJ,EAAE,MAAF,CAAd,EAAyBA,CAAzB,EAA4B,KAA5B,CAAJ;;SAEOA,CAAP;;;ACTF,SAASwR,UAAT,CAAoBxR,CAApB,EAAuByR,OAAvB,EAAgCjR,IAAhC,EAAsCkR,QAAtC,EAAgD;UACxClR,IAAN,QAAekR,QAAf,EAAyBvR,IAAzB,CAA8B,UAACC,CAAD,EAAIC,IAAJ,EAAa;QACnCb,MAAMa,KAAKW,OAAL,CAAaR,IAAb,CAAZ;QACMmR,cAAcjS,IAAIjB,OAAJ,CAAYgT,OAAZ,EAAqBjS,GAArB,CAApB;;SAEKwB,OAAL,CAAaR,IAAb,IAAqBmR,WAArB;GAJF;;;AAQF,AAAe,SAASC,iBAAT,CAA2BF,QAA3B,EAAqC1R,CAArC,EAAwCR,GAAxC,EAA6C;GACzD,MAAD,EAAS,KAAT,EAAgByB,OAAhB,CAAwB;WAAQuQ,WAAWxR,CAAX,EAAcR,GAAd,EAAmBgB,IAAnB,EAAyBkR,QAAzB,CAAR;GAAxB;;SAEOA,QAAP;;;ACdK,SAAS1F,UAAT,CAAoBvB,IAApB,EAA0B;SACxBA,KAAKC,IAAL,GACK1G,OADL,CACa,MADb,EACqB,GADrB,EAEKzB,MAFZ;;;;;;AAQF,AAAO,SAAS0N,WAAT,CAAqB3P,KAArB,EAA4B;MAC3BuR,kBAAkB7F,WAAW1L,MAAMmK,IAAN,EAAX,CAAxB;;MAEMqH,WAAWxR,MAAMiB,IAAN,CAAW,GAAX,EAAgBkJ,IAAhB,EAAjB;MACMsH,aAAa/F,WAAW8F,QAAX,CAAnB;;MAEID,kBAAkB,CAAtB,EAAyB;WAChBE,aAAaF,eAApB;GADF,MAEO,IAAIA,oBAAoB,CAApB,IAAyBE,aAAa,CAA1C,EAA6C;WAC3C,CAAP;;;SAGK,CAAP;;;ACnBF;;;AAEA,AAAe,SAASC,kBAAT,CACbhS,CADa,EAEbiS,SAFa,EAGbC,WAHa,EAKb;MADAjB,YACA,uEADY,IACZ;;MACMkB,aAAaF,UAAUxQ,MAAV,CAAiB;WAAQyQ,YAAYE,OAAZ,CAAoBC,IAApB,MAA8B,CAAC,CAAvC;GAAjB,CAAnB;;;;;;;;UAEWA,IAHX;;UAIQjR,OAAO,MAAb;UACMb,QAAQ,OAAd;;UAEM+R,QAAQtS,YAAUoB,IAAV,UAAmBiR,IAAnB,QAAd;;;;;UAKME,SACJD,MAAMrM,GAAN,CAAU,UAAC9E,KAAD,EAAQd,IAAR;eAAiBL,EAAEK,IAAF,EAAQG,IAAR,CAAaD,KAAb,CAAjB;OAAV,EACMiS,OADN,GAEM/Q,MAFN,CAEa;eAAQgJ,SAAS,EAAjB;OAFb,CADF;;;;;;UASI8H,OAAOhQ,MAAP,KAAkB,CAAtB,EAAyB;YACnBkQ,kBAAJ;;;YAGIxB,YAAJ,EAAe;sBACDyB,UAAUH,OAAO,CAAP,CAAV,EAAqBvS,CAArB,CAAZ;SADF,MAEO;sBACOuS,OAAO,CAAP,CAAZ;;;;aAGKE;;;;;sCA5BQN,UAAnB,4GAA+B;;;;;;;;;;;;;;;;;;;;;;SAiCxB,IAAP;;;AC3CF,SAASQ,UAAT,CAAoBrS,KAApB,EAA2BsS,WAA3B,EAAwC;;;MAGlCtS,MAAMgC,QAAN,GAAiBC,MAAjB,GAA0BqQ,WAA9B,EAA2C;WAClC,KAAP;;;MAGEC,cAAcvS,KAAd,CAAJ,EAA0B;WACjB,KAAP;;;SAGK,IAAP;;;;;;AAMF,AAAe,SAASwS,uBAAT,CACb9S,CADa,EAEb+S,SAFa,EAKb;MAFAH,WAEA,uEAFc,CAEd;MADAI,QACA,uEADW,IACX;;;;;;sCACuBD,SAAvB,4GAAkC;UAAvB7M,QAAuB;;UAC1BoM,QAAQtS,EAAEkG,QAAF,CAAd;;;;UAIIoM,MAAM/P,MAAN,KAAiB,CAArB,EAAwB;YAChBjC,QAAQN,EAAEsS,MAAM,CAAN,CAAF,CAAd;;YAEIK,WAAWrS,KAAX,EAAkBsS,WAAlB,CAAJ,EAAoC;cAC9B3Q,gBAAJ;cACI+Q,QAAJ,EAAc;sBACF1S,MAAMmK,IAAN,EAAV;WADF,MAEO;sBACKnK,MAAM2S,IAAN,EAAV;;;cAGEhR,OAAJ,EAAa;mBACJA,OAAP;;;;;;;;;;;;;;;;;;;;SAMD,IAAP;;;AChDF;AACA,AAAe,SAASyQ,SAAT,CAAmBjI,IAAnB,EAAyBzK,CAAzB,EAA4B;;;MAGnCkT,YAAYlT,aAAWyK,IAAX,cAA0BA,IAA1B,EAAlB;SACOyI,cAAc,EAAd,GAAmBzI,IAAnB,GAA0ByI,SAAjC;;;ACLa,SAASL,aAAT,CAAuBvS,KAAvB,EAA8B;MACrCgD,UAAUhD,MAAMgD,OAAN,GAAgBkP,OAAhB,EAAhB;MACMW,gBAAgB7P,QAAQ/B,IAAR,CAAa,UAAC+I,MAAD,EAAY;QACvCvC,aAAgBuC,OAAOtJ,OAAP,CAAeoS,KAA/B,SAAwC9I,OAAOtJ,OAAP,CAAe8G,EAA7D;WACOC,WAAW7F,QAAX,CAAoB,SAApB,CAAP;GAFoB,CAAtB;;SAKOiR,kBAAkBE,SAAzB;;;ACPF;;;;AAIA,AAAe,SAASC,gBAAT,CAA0BhT,KAA1B,EAAiC;SACvCA,MAAMmK,IAAN,GAAaC,IAAb,GAAoBnI,MAApB,IAA8B,GAArC;;;ACHa,SAASgR,WAAT,CAAqBvT,CAArB,EAAwB;SAC9BA,EAAEmH,cAAF,EAAkB5E,MAAlB,GAA2B,CAAlC;;;ACHF,mBACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA,AACA;;ACtBA;AACA,AAAO,IAAMiR,kBAAkB,wCAAxB;;;;AAIP,AAAO,IAAMC,eAAe,IAAIrV,MAAJ,CAAW,aAAX,EAA0B,GAA1B,CAArB;;;;;;;;;;AAUP,AAAO;;;;;;;AAQP,AAAO;;;AAKP,AAAO,IAAMsV,iBAAiB,WAAvB;AACP,AAAO,IAAMC,kBAAkB,WAAxB;AACP,AAAO,IAAMC,uBAAuB,4BAA7B;AACP,AAAO,IAAMC,yBAAyB,oBAA/B;AACP,AAAO,IAAMC,wBAAwB,QAA9B;AACP,IAAMC,SAAS,CACb,KADa,EAEb,KAFa,EAGb,KAHa,EAIb,KAJa,EAKb,KALa,EAMb,KANa,EAOb,KAPa,EAQb,KARa,EASb,KATa,EAUb,KAVa,EAWb,KAXa,EAYb,KAZa,CAAf;AAcA,IAAMC,YAAYD,OAAO1V,IAAP,CAAY,GAAZ,CAAlB;AACA,IAAM4V,aAAa,qCAAnB;AACA,IAAMC,aAAa,wCAAnB;AACA,AAAO,IAAMC,oBACX,IAAI/V,MAAJ,OAAe6V,UAAf,WAA+BC,UAA/B,wBAA4DF,SAA5D,QAA0E,IAA1E,CADK;;;;;AAMP,AAAO,IAAMI,qBAAqB,gBAA3B;;AAEP,AAAO,IAAMC,oBACX,IAAIjW,MAAJ,CAAW,2BAAX,EAAwC,GAAxC,CADK;;ACxDP;;AAEA,AAAe,SAASkW,WAAT,CAAqBC,MAArB,EAA6B;SACnCA,OAAOvQ,OAAP,CAAewP,eAAf,EAAgC,IAAhC,EAAsC9I,IAAtC,EAAP;;;ACHa,SAAS/I,OAAT,CAAe6S,YAAf,EAA6B;iBAC3BA,aAAa9J,IAAb,EAAf;MACI+J,SAASC,QAAT,CAAkBF,YAAlB,CAAJ,EAAqC;WAC5BA,YAAP;;;SAGK,IAAP;;;ACHF;;AAEA,AAAe,SAASG,QAAT,CAAkBC,GAAlB,QAAuC;MAAd5U,CAAc,QAAdA,CAAc;MAAX6U,OAAW,QAAXA,OAAW;;;MAEhDD,IAAIrS,MAAJ,GAAa,IAAb,IAAqBqS,IAAIrS,MAAJ,GAAa,CAAtC,EAAyC,OAAO,IAAP;;;MAGrCsS,WAAWvF,eAAeuF,OAAf,EAAwB,EAAxB,MAAgCvF,eAAesF,GAAf,EAAoB,EAApB,CAA/C,EAAwE,OAAO,IAAP;;MAElEE,UAAUpC,UAAUkC,GAAV,EAAe5U,CAAf,CAAhB;;;;MAIIyT,aAAalU,IAAb,CAAkBuV,OAAlB,CAAJ,EAAgC,OAAO,IAAP;;SAEzBA,QAAQpK,IAAR,EAAP;;;ACnBF;;;;AAIA,AASA,AAAO,SAASqK,eAAT,CAAyBC,UAAzB,EAAqC;SACnC,CAACA,WAAWzP,KAAX,CAAiB4O,iBAAjB,KAAuC,EAAxC,EACW9V,IADX,CACgB,GADhB,EAEW2F,OAFX,CAEmB8P,qBAFnB,EAE0C,GAF1C,EAGW9P,OAHX,CAGmB6P,sBAHnB,EAG2C,UAH3C,EAIW7P,OAJX,CAImB4P,oBAJnB,EAIyC,IAJzC,EAKWlJ,IALX,EAAP;;;;;AAUF,AAAe,SAASuK,kBAAT,CAA4BD,UAA5B,EAAwC;;MAEjDtB,eAAenU,IAAf,CAAoByV,UAApB,KAAmCrB,gBAAgBpU,IAAhB,CAAqByV,UAArB,CAAvC,EAAyE;iBAC1DzL,SAASyL,UAAT,EAAqB,EAArB,CAAb;;;MAGEE,OAAOC,OAAO,IAAIC,IAAJ,CAASJ,UAAT,CAAP,CAAX;;MAEI,CAACE,KAAKG,OAAL,EAAL,EAAqB;iBACNN,gBAAgBC,UAAhB,CAAb;WACOG,OAAO,IAAIC,IAAJ,CAASJ,UAAT,CAAP,CAAP;;;SAGKE,KAAKG,OAAL,KAAiBH,KAAKI,WAAL,EAAjB,GAAsC,IAA7C;;;ACzBF;;AACA,AAAe,SAASC,gBAAT,CACb3L,OADa,QASb;MANE5J,CAMF,QANEA,CAMF;mCALEwV,kBAKF;MALEA,kBAKF,yCALuB,IAKvB;wBAJErE,KAIF;MAJEA,KAIF,8BAJU,EAIV;sBAHE3R,GAGF;MAHEA,GAGF,4BAHQ,EAGR;iCAFEiW,cAEF;MAFEA,cAEF,uCAFmB,IAEnB;;;;qBAGgB7L,OAAhB,EAAyB5J,CAAzB;;;;;MAKIyV,cAAJ,EAAoBhM,YAAYG,OAAZ,EAAqB5J,CAArB;;;;;aAKT4J,OAAX,EAAoB5J,CAApB,EAAuBR,GAAvB;;;;gBAIcoK,OAAd,EAAuB5J,CAAvB;;;;;gBAKW4J,OAAX,EAAoB5J,CAApB;;;eAGa4J,OAAb,EAAsB5J,CAAtB,EAAyBmR,KAAzB;;;oBAGkBvH,OAAlB,EAA2B5J,CAA3B,EAA8BR,GAA9B;;;;;;MAMIiW,cAAJ,EAAoBxE,aAAUrH,OAAV,EAAmB5J,CAAnB,EAAsBwV,kBAAtB;;;cAGR5L,OAAZ,EAAqB5J,CAArB;;;kBAGgB4J,OAAhB,EAAyB5J,CAAzB;;SAEO4J,OAAP;;;AC5Da,SAAS8L,aAAT,CAAoBvE,KAApB,QAAuC;MAAV3R,GAAU,QAAVA,GAAU;MAALQ,CAAK,QAALA,CAAK;;;;MAGhDoU,mBAAmB7U,IAAnB,CAAwB4R,KAAxB,CAAJ,EAAoC;YAC1BwE,kBAAkBxE,KAAlB,EAAyB3R,GAAzB,CAAR;;;;;MAKE2R,MAAM5O,MAAN,GAAe,GAAnB,EAAwB;;QAEhBqT,KAAK5V,EAAE,IAAF,CAAX;QACI4V,GAAGrT,MAAH,KAAc,CAAlB,EAAqB;cACXqT,GAAGnL,IAAH,EAAR;;;;;SAKGiI,UAAUvB,KAAV,EAAiBnR,CAAjB,EAAoB0K,IAApB,EAAP;;;ACfF,SAASmL,sBAAT,CAAgCC,UAAhC,EAA4CrL,IAA5C,EAAkD;;;;MAI5CqL,WAAWvT,MAAX,IAAqB,CAAzB,EAA4B;;;;;UAIpBwT,aAAaD,WAAWnT,MAAX,CAAkB,UAACC,GAAD,EAAMoT,SAAN,EAAoB;YACnDA,SAAJ,IAAiBpT,IAAIoT,SAAJ,IAAiBpT,IAAIoT,SAAJ,IAAiB,CAAlC,GAAsC,CAAvD;eACOpT,GAAP;OAFiB,EAGhB,EAHgB,CAAnB;;kCAME,iBAAgBmT,UAAhB,EACQpT,MADR,CACe,UAACC,GAAD,EAAM8C,GAAN,EAAc;YAChB9C,IAAI,CAAJ,IAASmT,WAAWrQ,GAAX,CAAb,EAA8B;iBACrB,CAACA,GAAD,EAAMqQ,WAAWrQ,GAAX,CAAN,CAAP;;;eAGK9C,GAAP;OANT,EAOU,CAAC,CAAD,EAAI,CAAJ,CAPV,CAVwB;;UASnBqT,OATmB;UASVC,SATU;;;;;;;;UAuBtBA,aAAa,CAAb,IAAkBD,QAAQ1T,MAAR,IAAkB,CAAxC,EAA2C;qBAC5BkI,KAAK7F,KAAL,CAAWqR,OAAX,CAAb;;;UAGIE,YAAY,CAACL,WAAW,CAAX,CAAD,EAAgBA,WAAWvJ,KAAX,CAAiB,CAAC,CAAlB,CAAhB,CAAlB;UACM6J,aAAaD,UAAUxT,MAAV,CAAiB,UAACC,GAAD,EAAMhF,GAAN;eAAcgF,IAAIL,MAAJ,GAAa3E,IAAI2E,MAAjB,GAA0BK,GAA1B,GAAgChF,GAA9C;OAAjB,EAAoE,EAApE,CAAnB;;UAEIwY,WAAW7T,MAAX,GAAoB,EAAxB,EAA4B;;aACnB6T;;;;;WAGF3L;;;;;;;SAGF,IAAP;;;AAGF,SAAS4L,oBAAT,CAA8BP,UAA9B,EAA0CtW,GAA1C,EAA+C;;;;;;;mBAO5BE,IAAIC,KAAJ,CAAUH,GAAV,CAP4B;MAOrCoP,IAPqC,cAOrCA,IAPqC;;MAQvC0H,cAAc1H,KAAK5K,OAAL,CAAaqQ,iBAAb,EAAgC,EAAhC,CAApB;;MAEMkC,YAAYT,WAAW,CAAX,EAAcrH,WAAd,GAA4BzK,OAA5B,CAAoC,GAApC,EAAyC,EAAzC,CAAlB;MACMwS,iBAAiBC,MAAMC,WAAN,CAAkBH,SAAlB,EAA6BD,WAA7B,CAAvB;;MAEIE,iBAAiB,GAAjB,IAAwBD,UAAUhU,MAAV,GAAmB,CAA/C,EAAkD;WACzCuT,WAAWvJ,KAAX,CAAiB,CAAjB,EAAoBlO,IAApB,CAAyB,EAAzB,CAAP;;;MAGIsY,UAAUb,WAAWvJ,KAAX,CAAiB,CAAC,CAAlB,EAAqB,CAArB,EAAwBkC,WAAxB,GAAsCzK,OAAtC,CAA8C,GAA9C,EAAmD,EAAnD,CAAhB;MACM4S,eAAeH,MAAMC,WAAN,CAAkBC,OAAlB,EAA2BL,WAA3B,CAArB;;MAEIM,eAAe,GAAf,IAAsBD,QAAQpU,MAAR,IAAkB,CAA5C,EAA+C;WACtCuT,WAAWvJ,KAAX,CAAiB,CAAjB,EAAoB,CAAC,CAArB,EAAwBlO,IAAxB,CAA6B,EAA7B,CAAP;;;SAGK,IAAP;;;;;AAKF,AAAe,SAASsX,iBAAT,CAA2BxE,KAA3B,EAA4C;MAAV3R,GAAU,uEAAJ,EAAI;;;;MAGnDsW,aAAa3E,MAAMvM,KAAN,CAAYwP,kBAAZ,CAAnB;MACI0B,WAAWvT,MAAX,KAAsB,CAA1B,EAA6B;WACpB4O,KAAP;;;MAGE0F,WAAWhB,uBAAuBC,UAAvB,EAAmC3E,KAAnC,CAAf;MACI0F,QAAJ,EAAc,OAAOA,QAAP;;aAEHR,qBAAqBP,UAArB,EAAiCtW,GAAjC,CAAX;MACIqX,QAAJ,EAAc,OAAOA,QAAP;;;;SAIP1F,KAAP;;;AC1FF,IAAM2F,WAAW;UACPxC,WADO;kBAECyC,OAFD;OAGVpC,QAHU;kBAICM,kBAJD;WAKN+B,gBALM;SAMRtB;CANT,CASA,AAEA,AACA,AACA,AACA,AACA,AACA,AACA;;ACdA;;;;;;;;;;;AAWA,AAAe,SAASuB,eAAT,CAAyBjX,CAAzB,EAA4BkX,IAA5B,EAAkC;;;;;;MAM3CA,KAAKvP,uBAAT,EAAkC;QAC5BA,wBAAwB3H,CAAxB,CAAJ;;;MAGEkJ,uBAAoBlJ,CAApB,CAAJ;MACImN,gBAAanN,CAAb,EAAgBkX,KAAKpK,WAArB,CAAJ;MACMqK,gBAAgB9G,oBAAiBrQ,CAAjB,CAAtB;;SAEOmX,aAAP;;;AC3BF,IAAMC,0BAA0B;eACjB;6BACc,IADd;iBAEE,IAFF;wBAGS;GAJQ;;;;;;;;;;;;;;;;;;;;;SAAA,yBA0BGF,IA1BH,EA0BS;QAA7BlX,CAA6B,QAA7BA,CAA6B;QAA1BiT,IAA0B,QAA1BA,IAA0B;QAApB9B,KAAoB,QAApBA,KAAoB;QAAb3R,GAAa,QAAbA,GAAa;;wBACzB,KAAK6X,WAAjB,EAAiCH,IAAjC;;QAEIlX,KAAKmC,QAAQC,IAAR,CAAa6Q,IAAb,CAAT;;;;QAII5S,OAAO,KAAKiX,cAAL,CAAoBtX,CAApB,EAAuBmR,KAAvB,EAA8B3R,GAA9B,EAAmC0X,IAAnC,CAAX;;QAEI5D,iBAAiBjT,IAAjB,CAAJ,EAA4B;aACnB,KAAKkX,kBAAL,CAAwBlX,IAAxB,EAA8BL,CAA9B,CAAP;;;;;;;;;;wCAKgB,iBAAgBkX,IAAhB,EAAsBzV,MAAtB,CAA6B;eAAKyV,KAAKM,CAAL,MAAY,IAAjB;OAA7B,CAAlB,4GAAuE;YAA5D9R,GAA4D;;aAChEA,GAAL,IAAY,KAAZ;YACIvD,QAAQC,IAAR,CAAa6Q,IAAb,CAAJ;;eAEO,KAAKqE,cAAL,CAAoBtX,CAApB,EAAuBmR,KAAvB,EAA8B3R,GAA9B,EAAmC0X,IAAnC,CAAP;;YAEI5D,iBAAiBjT,IAAjB,CAAJ,EAA4B;;;;;;;;;;;;;;;;;;;WAKvB,KAAKkX,kBAAL,CAAwBlX,IAAxB,EAA8BL,CAA9B,CAAP;GApD4B;;;;gBAAA,0BAwDfA,CAxDe,EAwDZmR,KAxDY,EAwDL3R,GAxDK,EAwDA0X,IAxDA,EAwDM;WAC3BF,iBACGC,gBAAgBjX,CAAhB,EAAmBkX,IAAnB,CADH,EAEL;UAAA;0BAEsBA,KAAK1B,kBAF3B;kBAAA;;KAFK,CAAP;GAzD4B;;;;;;oBAAA,8BAsEXnV,IAtEW,EAsELL,CAtEK,EAsEF;QACtB,CAACK,IAAL,EAAW;aACF,IAAP;;;WAGKkN,gBAAgBvN,EAAEiT,IAAF,CAAO5S,IAAP,CAAhB,CAAP;;;;;;;CA3EJ,CAqFA;;AC7FA;;;;;;;AAOA,AAAO,IAAMoX,yBAAyB,CACpC,iBADoC,EAEpC,UAFoC,EAGpC,SAHoC,EAIpC,UAJoC,EAKpC,OALoC,CAA/B;;;;AAUP,AAAO,IAAMC,uBAAuB,CAClC,UADkC,CAA7B;;;;;;;;;AAWP,AAAO,IAAMC,yBAAyB,CACpC,sBADoC,EAEpC,kBAFoC,EAGpC,kBAHoC,EAIpC,YAJoC,EAKpC,mBALoC,EAMpC,cANoC,CAA/B;;AASP,AAAO,IAAMC,uBAAuB,CAClC,YADkC,EAElC,cAFkC,EAGlC,cAHkC,EAIlC,aAJkC,EAKlC,aALkC,EAMlC,aANkC,EAOlC,aAPkC,EAQlC,eARkC,EASlC,eATkC,EAUlC,iBAVkC,EAWlC,UAXkC,EAYlC,YAZkC,EAalC,IAbkC,EAclC,iBAdkC,EAelC,OAfkC,CAA7B;;ACxBP,IAAMC,wBAAwB;SAAA,yBACG;QAArB7X,CAAqB,QAArBA,CAAqB;QAAlBR,GAAkB,QAAlBA,GAAkB;QAAbsY,SAAa,QAAbA,SAAa;;;;QAGzB3G,cAAJ;;YAEQa,mBAAgBhS,CAAhB,EAAmByX,sBAAnB,EAA2CK,SAA3C,CAAR;QACI3G,KAAJ,EAAW,OAAOuE,cAAWvE,KAAX,EAAkB,EAAE3R,QAAF,EAAOQ,IAAP,EAAlB,CAAP;;;;YAIH8S,wBAAqB9S,CAArB,EAAwB2X,sBAAxB,CAAR;QACIxG,KAAJ,EAAW,OAAOuE,cAAWvE,KAAX,EAAkB,EAAE3R,QAAF,EAAOQ,IAAP,EAAlB,CAAP;;;YAGHgS,mBAAgBhS,CAAhB,EAAmB0X,oBAAnB,EAAyCI,SAAzC,CAAR;QACI3G,KAAJ,EAAW,OAAOuE,cAAWvE,KAAX,EAAkB,EAAE3R,QAAF,EAAOQ,IAAP,EAAlB,CAAP;;;YAGH8S,wBAAqB9S,CAArB,EAAwB4X,oBAAxB,CAAR;QACIzG,KAAJ,EAAW,OAAOuE,cAAWvE,KAAX,EAAkB,EAAE3R,QAAF,EAAOQ,IAAP,EAAlB,CAAP;;;WAGJ,EAAP;;CAvBJ,CA2BA;;ACxCA;;;;;;AAMA,AAAO,IAAM+X,mBAAmB,CAC9B,KAD8B,EAE9B,OAF8B,EAG9B,WAH8B,EAI9B,eAJ8B,EAK9B,YAL8B,EAM9B,WAN8B,EAO9B,SAP8B,CAAzB;;AAUP,AAAO,IAAMC,oBAAoB,GAA1B;;;;;;;;;AASP,AAAO,IAAMC,mBAAmB,CAC9B,sBAD8B,EAE9B,mBAF8B,EAG9B,oBAH8B,EAI9B,mBAJ8B,EAK9B,oBAL8B,EAM9B,qBAN8B,EAO9B,aAP8B,EAQ9B,iBAR8B,EAS9B,oBAT8B,EAU9B,qBAV8B,EAW9B,eAX8B,EAY9B,YAZ8B,EAa9B,YAb8B,EAc9B,cAd8B,EAe9B,cAf8B,EAgB9B,yBAhB8B,EAiB9B,qBAjB8B,EAkB9B,qBAlB8B,EAmB9B,SAnB8B,EAoB9B,SApB8B,EAqB9B,gBArB8B,EAsB9B,gBAtB8B,EAuB9B,SAvB8B,CAAzB;;;;AA4BP,IAAMC,WAAW,aAAjB;AACA,AAAO,IAAMC,sBAAsB,CACjC,CAAC,SAAD,EAAYD,QAAZ,CADiC,EAEjC,CAAC,SAAD,EAAYA,QAAZ,CAFiC,CAA5B;;ACzCP,IAAME,yBAAyB;SAAA,yBACH;QAAhBpY,CAAgB,QAAhBA,CAAgB;QAAb8X,SAAa,QAAbA,SAAa;;QACpBvD,eAAJ;;;;aAISvC,mBAAgBhS,CAAhB,EAAmB+X,gBAAnB,EAAqCD,SAArC,CAAT;QACIvD,UAAUA,OAAOhS,MAAP,GAAgByV,iBAA9B,EAAiD;aACxC1D,YAAYC,MAAZ,CAAP;;;;aAIOzB,wBAAqB9S,CAArB,EAAwBiY,gBAAxB,EAA0C,CAA1C,CAAT;QACI1D,UAAUA,OAAOhS,MAAP,GAAgByV,iBAA9B,EAAiD;aACxC1D,YAAYC,MAAZ,CAAP;;;;;;;;;;wCAK8B4D,mBAAhC,4GAAqD;;;;;YAAzCjS,QAAyC;YAA/BmS,KAA+B;;YAC7ChY,OAAOL,EAAEkG,QAAF,CAAb;YACI7F,KAAKkC,MAAL,KAAgB,CAApB,EAAuB;cACfkI,OAAOpK,KAAKoK,IAAL,EAAb;cACI4N,MAAM9Y,IAAN,CAAWkL,IAAX,CAAJ,EAAsB;mBACb6J,YAAY7J,IAAZ,CAAP;;;;;;;;;;;;;;;;;;;WAKC,IAAP;;CA7BJ,CAiCA;;AC9CA;;;;AAIA,AAAO,IAAM6N,2BAA2B,CACtC,wBADsC,EAEtC,aAFsC,EAGtC,SAHsC,EAItC,gBAJsC,EAKtC,WALsC,EAMtC,cANsC,EAOtC,UAPsC,EAQtC,UARsC,EAStC,SATsC,EAUtC,eAVsC,EAWtC,UAXsC,EAYtC,cAZsC,EAatC,qBAbsC,EActC,cAdsC,EAetC,SAfsC,EAgBtC,MAhBsC,CAAjC;;;;;AAsBP,AAAO,IAAMC,2BAA2B,CACtC,4BADsC,EAEtC,oBAFsC,EAGtC,0BAHsC,EAItC,kBAJsC,EAKtC,oBALsC,EAMtC,kBANsC,EAOtC,iBAPsC,EAQtC,aARsC,EAStC,eATsC,EAUtC,qBAVsC,EAWtC,mBAXsC,EAYtC,cAZsC,EAatC,aAbsC,EActC,YAdsC,EAetC,kBAfsC,EAgBtC,WAhBsC,EAiBtC,UAjBsC,CAAjC;;;;;AAuBP,IAAMC,kBAAkB,mDAAxB;AACA,AAAO,IAAMC,yBAAyB;;AAEpC,IAAIra,MAAJ,CAAW,4BAAX,EAAyC,GAAzC,CAFoC;;;;AAMpC,IAAIA,MAAJ,CAAW,6BAAX,EAA0C,GAA1C,CANoC;;AAQpC,IAAIA,MAAJ,iBAAyBoa,eAAzB,kBAAuD,GAAvD,CARoC,CAA/B;;ACrCP,IAAME,gCAAgC;SAAA,yBACL;QAArB1Y,CAAqB,QAArBA,CAAqB;QAAlBR,GAAkB,QAAlBA,GAAkB;QAAbsY,SAAa,QAAbA,SAAa;;QACzBa,sBAAJ;;;;oBAIgB3G,mBAAgBhS,CAAhB,EAAmBsY,wBAAnB,EAA6CR,SAA7C,EAAwD,KAAxD,CAAhB;QACIa,aAAJ,EAAmB,OAAO1D,mBAAmB0D,aAAnB,CAAP;;;;oBAIH7F,wBAAqB9S,CAArB,EAAwBuY,wBAAxB,CAAhB;QACII,aAAJ,EAAmB,OAAO1D,mBAAmB0D,aAAnB,CAAP;;;oBAGHnL,eAAehO,GAAf,EAAoBiZ,sBAApB,CAAhB;QACIE,aAAJ,EAAmB,OAAO1D,mBAAmB0D,aAAnB,CAAP;;WAEZ,IAAP;;CAlBJ,CAsBA;;ACnCA;;;;;;;;;;;;;;;;;AAiBA,IAAMC,sBAAsB;;SAAA,qBAEhB;WACD,IAAP;;CAHJ;;AAOA;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACxBA;;;AAGA,AAAO,IAAMC,2BAA2B,CACtC,UADsC,EAEtC,eAFsC,EAGtC,WAHsC,CAAjC;;AAMP,AAAO,IAAMC,2BAA2B,CACtC,qBADsC,CAAjC;;AAIP,AAAO,IAAMC,gCAAgC,CAC3C,QAD2C,EAE3C,YAF2C,EAG3C,OAH2C,EAI3C,OAJ2C,EAK3C,UAL2C,CAAtC;AAOP,AAAO,IAAMC,mCAAmC,IAAI5a,MAAJ,CAAW2a,8BAA8B1a,IAA9B,CAAmC,GAAnC,CAAX,EAAoD,GAApD,CAAzC;;AAEP,AAAO,IAAM4a,gCAAgC,CAC3C,QAD2C,EAE3C,QAF2C,EAG3C,OAH2C,EAI3C,UAJ2C,EAK3C,UAL2C,EAM3C,MAN2C,EAO3C,IAP2C,EAQ3C,YAR2C,EAS3C,MAT2C,EAU3C,QAV2C,EAW3C,QAX2C,EAY3C,KAZ2C,EAa3C,QAb2C,EAc3C,SAd2C,EAe3C,QAf2C,EAgB3C,SAhB2C,EAiB3C,SAjB2C,EAkB3C,QAlB2C,EAmB3C,OAnB2C,EAoB3C,UApB2C,EAqB3C,SArB2C,EAsB3C,OAtB2C,EAuB3C,OAvB2C,EAwB3C,KAxB2C,EAyB3C,aAzB2C,CAAtC;AA2BP,AAAO,IAAMC,mCAAmC,IAAI9a,MAAJ,CAAW6a,8BAA8B5a,IAA9B,CAAmC,GAAnC,CAAX,EAAoD,GAApD,CAAzC;;AAEP,AAAO,IAAM8a,SAAS,gBAAf;AACP,AAAO,IAAMC,SAAS,kBAAf;;AC3CP,SAASC,MAAT,CAAgB/Y,KAAhB,EAAuB;UACXA,MAAME,IAAN,CAAW,OAAX,KAAuB,EAAjC,WAAuCF,MAAME,IAAN,CAAW,IAAX,KAAoB,EAA3D;;;;AAIF,AAAO,SAAS8Y,aAAT,CAAuB9Z,GAAvB,EAA4B;QAC3BA,IAAIkL,IAAJ,EAAN;MACIgB,QAAQ,CAAZ;;MAEIsN,iCAAiCzZ,IAAjC,CAAsCC,GAAtC,CAAJ,EAAgD;aACrC,EAAT;;;MAGE0Z,iCAAiC3Z,IAAjC,CAAsCC,GAAtC,CAAJ,EAAgD;aACrC,EAAT;;;;;MAKE2Z,OAAO5Z,IAAP,CAAYC,GAAZ,CAAJ,EAAsB;aACX,EAAT;;;MAGE4Z,OAAO7Z,IAAP,CAAYC,GAAZ,CAAJ,EAAsB;aACX,EAAT;;;;;SAKKkM,KAAP;;;;AAIF,AAAO,SAAS6N,SAAT,CAAmBrU,IAAnB,EAAyB;MAC1BA,KAAK1E,IAAL,CAAU,KAAV,CAAJ,EAAsB;WACb,CAAP;;;SAGK,CAAP;;;;;AAKF,AAAO,SAASgZ,cAAT,CAAwBtU,IAAxB,EAA8B;MAC/BwG,QAAQ,CAAZ;MACM+N,aAAavU,KAAK5B,OAAL,CAAa,QAAb,EAAuBgN,KAAvB,EAAnB;;MAEImJ,WAAWlX,MAAX,KAAsB,CAA1B,EAA6B;aAClB,EAAT;;;MAGIc,UAAU6B,KAAKoF,MAAL,EAAhB;MACIoP,iBAAJ;MACIrW,QAAQd,MAAR,KAAmB,CAAvB,EAA0B;eACbc,QAAQiH,MAAR,EAAX;;;GAGDjH,OAAD,EAAUqW,QAAV,EAAoBzY,OAApB,CAA4B,UAACX,KAAD,EAAW;QACjCyK,iBAAexL,IAAf,CAAoB8Z,OAAO/Y,KAAP,CAApB,CAAJ,EAAwC;eAC7B,EAAT;;GAFJ;;SAMOoL,KAAP;;;;;AAKF,AAAO,SAASiO,cAAT,CAAwBzU,IAAxB,EAA8B;MAC/BwG,QAAQ,CAAZ;MACMmE,WAAW3K,KAAKkD,IAAL,EAAjB;MACMG,UAAUsH,SAAStR,GAAT,CAAa,CAAb,CAAhB;;MAEIgK,WAAWA,QAAQpF,OAAR,KAAoB,YAAnC,EAAiD;aACtC,EAAT;;;MAGE4H,iBAAexL,IAAf,CAAoB8Z,OAAOxJ,QAAP,CAApB,CAAJ,EAA2C;aAChC,EAAT;;;SAGKnE,KAAP;;;AAGF,AAAO,SAASkO,iBAAT,CAA2B1U,IAA3B,EAAiC;MAClCwG,QAAQ,CAAZ;;MAEM3H,QAAQ6H,WAAW1G,KAAK1E,IAAL,CAAU,OAAV,CAAX,CAAd;MACM8I,SAASsC,WAAW1G,KAAK1E,IAAL,CAAU,QAAV,CAAX,CAAf;MACMsD,MAAMoB,KAAK1E,IAAL,CAAU,KAAV,CAAZ;;;MAGIuD,SAASA,SAAS,EAAtB,EAA0B;aACf,EAAT;;;;MAIEuF,UAAUA,UAAU,EAAxB,EAA4B;aACjB,EAAT;;;MAGEvF,SAASuF,MAAT,IAAmB,CAACxF,IAAI5B,QAAJ,CAAa,QAAb,CAAxB,EAAgD;QACxC2X,OAAO9V,QAAQuF,MAArB;QACIuQ,OAAO,IAAX,EAAiB;;eACN,GAAT;KADF,MAEO;eACI1N,KAAK2N,KAAL,CAAWD,OAAO,IAAlB,CAAT;;;;SAIGnO,KAAP;;;AAGF,AAAO,SAASqO,eAAT,CAAyBC,KAAzB,EAAgC7Y,KAAhC,EAAuC;SACpC6Y,MAAMzX,MAAN,GAAe,CAAhB,GAAqBpB,KAA5B;;;AC1GF;;;;;;;;AAQA,IAAM8Y,+BAA+B;SAAA,yBACA;QAAzBja,CAAyB,QAAzBA,CAAyB;QAAtBiC,OAAsB,QAAtBA,OAAsB;QAAb6V,SAAa,QAAbA,SAAa;;QAC7BoC,iBAAJ;;;;;;QAMMC,WACJnI,mBACEhS,CADF,EAEE6Y,wBAFF,EAGEf,SAHF,EAIE,KAJF,CADF;;QAQIqC,QAAJ,EAAc;iBACDpD,QAAWoD,QAAX,CAAX;;UAEID,QAAJ,EAAc,OAAOA,QAAP;;;;;;QAMVE,OAAOpa,EAAE,KAAF,EAASiC,OAAT,EAAkBuQ,OAAlB,EAAb;QACM6H,YAAY,EAAlB;;SAEKpZ,OAAL,CAAa,UAACF,GAAD,EAAMI,KAAN,EAAgB;UACrB+D,OAAOlF,EAAEe,GAAF,CAAb;UACM+C,MAAMoB,KAAK1E,IAAL,CAAU,KAAV,CAAZ;;UAEI,CAACsD,GAAL,EAAU;;UAEN4H,QAAQ4N,cAAcxV,GAAd,CAAZ;eACSyV,UAAUrU,IAAV,CAAT;eACSsU,eAAetU,IAAf,CAAT;eACSyU,eAAezU,IAAf,CAAT;eACS0U,kBAAkB1U,IAAlB,CAAT;eACS6U,gBAAgBK,IAAhB,EAAsBjZ,KAAtB,CAAT;;gBAEU2C,GAAV,IAAiB4H,KAAjB;KAbF;;gCAiBE,iBAAgB2O,SAAhB,EAA2B1X,MAA3B,CAAkC,UAACC,GAAD,EAAM8C,GAAN;aAChC2U,UAAU3U,GAAV,IAAiB9C,IAAI,CAAJ,CAAjB,GAA0B,CAAC8C,GAAD,EAAM2U,UAAU3U,GAAV,CAAN,CAA1B,GAAkD9C,GADlB;KAAlC,EAEE,CAAC,IAAD,EAAO,CAAP,CAFF,CA5C+B;;QA2C1B0X,MA3C0B;QA2ClB5K,QA3CkB;;QAgD7BA,WAAW,CAAf,EAAkB;iBACLqH,QAAWuD,MAAX,CAAX;;UAEIJ,QAAJ,EAAc,OAAOA,QAAP;;;;;;;;;;wCAKOpB,wBAAvB,4GAAiD;YAAtC5S,QAAsC;;YACzC5F,QAAQN,EAAEkG,QAAF,EAAYoK,KAAZ,EAAd;YACMxM,MAAMxD,MAAME,IAAN,CAAW,KAAX,CAAZ;YACIsD,GAAJ,EAAS;qBACIiT,QAAWjT,GAAX,CAAX;cACIoW,QAAJ,EAAc,OAAOA,QAAP;;;YAGVK,OAAOja,MAAME,IAAN,CAAW,MAAX,CAAb;YACI+Z,IAAJ,EAAU;qBACGxD,QAAWwD,IAAX,CAAX;cACIL,QAAJ,EAAc,OAAOA,QAAP;;;YAGV3Z,QAAQD,MAAME,IAAN,CAAW,OAAX,CAAd;YACID,KAAJ,EAAW;qBACEwW,QAAWxW,KAAX,CAAX;cACI2Z,QAAJ,EAAc,OAAOA,QAAP;;;;;;;;;;;;;;;;;;WAIX,IAAP;;CA9EJ;;AAkFA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;ACzGe,SAASM,eAAT,CAAyB9O,KAAzB,EAAgC+O,UAAhC,EAA4CF,IAA5C,EAAkD;;;;;;MAM3D7O,QAAQ,CAAZ,EAAe;QACPgP,aAAa,IAAIC,QAAQC,eAAZ,CAA4B,IAA5B,EAAkCH,UAAlC,EAA8CF,IAA9C,EAAoDM,KAApD,EAAnB;;;;;;;QAOMC,cAAc,MAAMJ,UAA1B;QACMK,eAAe,EAAE,OAAOD,cAAc,GAArB,CAAF,CAArB;WACOpP,QAAQqP,YAAf;;;SAGK,CAAP;;;ACnBa,SAASC,aAAT,CAAuBlJ,QAAvB,EAAiC3D,OAAjC,EAA0C;;;;;MAKnDzC,QAAQ,CAAZ;;MAEIsC,YAAYzO,IAAZ,CAAiBuS,SAASpH,IAAT,EAAjB,CAAJ,EAAuC;QAC/BuQ,gBAAgB1R,SAASuI,QAAT,EAAmB,EAAnB,CAAtB;;;;QAIImJ,gBAAgB,CAApB,EAAuB;cACb,CAAC,EAAT;KADF,MAEO;cACG9O,KAAKE,GAAL,CAAS,CAAT,EAAY,KAAK4O,aAAjB,CAAR;;;;;;QAME9M,WAAWA,WAAW8M,aAA1B,EAAyC;eAC9B,EAAT;;;;SAIGvP,KAAP;;;AC5Ba,SAASwP,eAAT,CAAyB/M,OAAzB,EAAkCgN,IAAlC,EAAwC;;;;MAIjDhN,WAAW,CAACgN,IAAhB,EAAsB;WACb,EAAP;;;SAGK,CAAP;;;ACRK,IAAMlQ,aAAW,IAAjB;;;;AAIP,AAAO,IAAMmQ,0BAAwB,CACnC,OADmC,EAEnC,SAFmC,EAGnC,SAHmC,EAInC,SAJmC,EAKnC,QALmC,EAMnC,OANmC,EAOnC,OAPmC,EAQnC,OARmC,EASnC,KATmC,EAUnC,OAVmC,EAWnC,MAXmC,EAYnC,QAZmC,EAanC,KAbmC,EAcnC,iBAdmC,CAA9B;AAgBP,AAAO,IAAMC,6BAA2B,IAAIjd,MAAJ,CAAWgd,wBAAsB/c,IAAtB,CAA2B,GAA3B,CAAX,EAA4C,GAA5C,CAAjC;;;;;AAKP,AAAO,IAAMid,sBAAoB,IAAIld,MAAJ,CAAW,4CAAX,EAAyD,GAAzD,CAA1B;;;;AAIP,AAAO,IAAMmd,qBAAmB,IAAInd,MAAJ,CAAW,kBAAX,EAA+B,GAA/B,CAAzB;;;;AAIP,AAAO,IAAMod,sBAAoB,IAAIpd,MAAJ,CAAW,yBAAX,EAAsC,GAAtC,CAA1B;;8EAGP,AAAO,AAAMgJ;;AClCE,SAASqU,oBAAT,CAA8BlB,IAA9B,EAAoC;;MAE7Cc,2BAAyB9b,IAAzB,CAA8Bgb,IAA9B,CAAJ,EAAyC;WAChC,CAAC,EAAR;;;SAGK,CAAP;;;ACAF,SAASmB,SAAT,CAAiBC,KAAjB,EAAwB;UACZA,MAAMnb,IAAN,CAAW,OAAX,KAAuB,EAAjC,WAAuCmb,MAAMnb,IAAN,CAAW,IAAX,KAAoB,EAA3D;;;AAGF,AAAe,SAASgZ,gBAAT,CAAwBmC,KAAxB,EAA+B;;;;MAIxCtY,UAAUsY,MAAMrR,MAAN,EAAd;MACIsR,gBAAgB,KAApB;MACIC,gBAAgB,KAApB;MACInQ,QAAQ,CAAZ;;cAEWhO,MAAM,CAAN,EAAS,CAAT,CAAX,EAAwBuD,OAAxB,CAAgC,YAAM;QAChCoC,QAAQd,MAAR,KAAmB,CAAvB,EAA0B;;;;QAIpBuZ,aAAaJ,UAAQrY,OAAR,EAAiB,GAAjB,CAAnB;;;;QAII,CAACuY,aAAD,IAAkBxU,QAAQ7H,IAAR,CAAauc,UAAb,CAAtB,EAAgD;sBAC9B,IAAhB;eACS,EAAT;;;;;;QAME,CAACD,aAAD,IAAkB3U,kBAAkB3H,IAAlB,CAAuBuc,UAAvB,CAAlB,IACET,2BAAyB9b,IAAzB,CAA8Buc,UAA9B,CADN,EACiD;UAC3C,CAAC9U,kBAAkBzH,IAAlB,CAAuBuc,UAAvB,CAAL,EAAyC;wBACvB,IAAhB;iBACS,EAAT;;;;cAIMzY,QAAQiH,MAAR,EAAV;GAzBF;;SA4BOoB,KAAP;;;AC/Ca,SAASqQ,aAAT,CAAuBC,QAAvB,EAAiC;;;MAG1CR,oBAAkBjc,IAAlB,CAAuByc,QAAvB,CAAJ,EAAsC;WAC7B,CAAC,GAAR;;;SAGK,CAAP;;;ACFa,SAASC,WAAT,CACb1B,IADa,EAEbE,UAFa,EAGbyB,OAHa,EAIbzc,SAJa,EAKbqS,QALa,EAMbqK,YANa,EAOb;;MAEIA,aAAa5a,IAAb,CAAkB;WAAOgZ,SAAS/a,GAAhB;GAAlB,MAA2C6T,SAA/C,EAA0D;WACjD,KAAP;;;;;MAKE,CAACkH,IAAD,IAASA,SAASE,UAAlB,IAAgCF,SAAS2B,OAA7C,EAAsD;WAC7C,KAAP;;;MAGMpe,QAZR,GAYqB2B,SAZrB,CAYQ3B,QAZR;;mBAa+B4B,IAAIC,KAAJ,CAAU4a,IAAV,CAb/B;MAakB6B,QAblB,cAaQte,QAbR;;;;;MAgBIse,aAAate,QAAjB,EAA2B;WAClB,KAAP;;;;;MAKIue,WAAW9B,KAAKvW,OAAL,CAAakY,OAAb,EAAsB,EAAtB,CAAjB;MACI,CAACjR,WAAS1L,IAAT,CAAc8c,QAAd,CAAL,EAA8B;WACrB,KAAP;;;;;MAKEhB,2BAAyB9b,IAAzB,CAA8BuS,QAA9B,CAAJ,EAA6C;WACpC,KAAP;;;;MAIEA,SAASvP,MAAT,GAAkB,EAAtB,EAA0B;WACjB,KAAP;;;SAGK,IAAP;;;ACpDa,SAAS+Z,YAAT,CAAsB/B,IAAtB,EAA4BgC,SAA5B,EAAuC;;;;;MAKhD,CAACA,UAAUhd,IAAV,CAAegb,IAAf,CAAL,EAA2B;WAClB,CAAC,EAAR;;;SAGK,CAAP;;;ACPa,SAASiC,iBAAT,CAA2BR,QAA3B,EAAqC;;MAE9CV,oBAAkB/b,IAAlB,CAAuByc,QAAvB,CAAJ,EAAsC;WAC7B,EAAP;;;SAGK,CAAP;;;ACHa,SAASS,aAAT,CAAuBT,QAAvB,EAAiC;;MAE1CT,mBAAiBhc,IAAjB,CAAsByc,QAAtB,CAAJ,EAAqC;;;;;QAK/BV,oBAAkB/b,IAAlB,CAAuByc,QAAvB,CAAJ,EAAsC;aAC7B,CAAC,EAAR;;;;SAIG,CAAP;;;ACIK,SAASU,aAAT,CAAuBR,OAAvB,EAAgC;SAC9B,IAAI9d,MAAJ,OAAe8d,OAAf,EAA0B,GAA1B,CAAP;;;AAGF,SAASR,OAAT,CAAiBC,KAAjB,EAAwB7J,QAAxB,EAAkC;UACtBA,YAAY6J,MAAMlR,IAAN,EAAtB,WAAsCkR,MAAMnb,IAAN,CAAW,OAAX,KAAuB,EAA7D,WAAmEmb,MAAMnb,IAAN,CAAW,IAAX,KAAoB,EAAvF;;;AAGF,AAAe,SAASmc,UAAT,OAOZ;MANDC,KAMC,QANDA,KAMC;MALDnC,UAKC,QALDA,UAKC;MAJDyB,OAIC,QAJDA,OAIC;MAHDzc,SAGC,QAHDA,SAGC;MAFDO,CAEC,QAFDA,CAEC;+BADDmc,YACC;MADDA,YACC,qCADc,EACd;;cACW1c,aAAaC,IAAIC,KAAJ,CAAU8a,UAAV,CAAzB;MACM8B,YAAYG,cAAcR,OAAd,CAAlB;MACMf,OAAO5H,YAAYvT,CAAZ,CAAb;;;;;;;;;MASM6c,cAAcD,MAAMja,MAAN,CAAa,UAACma,aAAD,EAAgBC,IAAhB,EAAyB;;;;QAIlDxC,OAAOnM,aAAa2O,KAAK/b,OAAL,CAAauZ,IAA1B,CAAb;QACMoB,QAAQ3b,EAAE+c,IAAF,CAAd;QACMjL,WAAW6J,MAAMlR,IAAN,EAAjB;;QAEI,CAACwR,YAAY1B,IAAZ,EAAkBE,UAAlB,EAA8ByB,OAA9B,EAAuCzc,SAAvC,EAAkDqS,QAAlD,EAA4DqK,YAA5D,CAAL,EAAgF;aACvEW,aAAP;;;;QAIE,CAACA,cAAcvC,IAAd,CAAL,EAA0B;oBACVA,IAAd,IAAsB;eACb,CADa;0BAAA;;OAAtB;KADF,MAMO;oBACSA,IAAd,EAAoBzI,QAApB,GAAkCgL,cAAcvC,IAAd,EAAoBzI,QAAtD,SAAkEA,QAAlE;;;QAGIkL,eAAeF,cAAcvC,IAAd,CAArB;QACMyB,WAAWN,QAAQC,KAAR,EAAe7J,QAAf,CAAjB;QACM3D,UAAUF,eAAesM,IAAf,CAAhB;;QAEI7O,QAAQ4Q,aAAa/B,IAAb,EAAmBgC,SAAnB,CAAZ;aACSC,kBAAkBR,QAAlB,CAAT;aACSS,cAAcT,QAAd,CAAT;aACSD,cAAcC,QAAd,CAAT;aACSxC,iBAAemC,KAAf,CAAT;aACSF,qBAAqBlB,IAArB,CAAT;aACSW,gBAAgB/M,OAAhB,EAAyBgN,IAAzB,CAAT;aACSH,cAAclJ,QAAd,EAAwB3D,OAAxB,CAAT;aACSqM,gBAAgB9O,KAAhB,EAAuB+O,UAAvB,EAAmCF,IAAnC,CAAT;;iBAEa7O,KAAb,GAAqBA,KAArB;;WAEOoR,aAAP;GAvCkB,EAwCjB,EAxCiB,CAApB;;SA0CO,iBAAgBD,WAAhB,EAA6Bta,MAA7B,KAAwC,CAAxC,GAA4C,IAA5C,GAAmDsa,WAA1D;;;AClFF;;AAEA,IAAMI,8BAA8B;SAAA,yBACgB;QAAxCjd,CAAwC,QAAxCA,CAAwC;QAArCR,GAAqC,QAArCA,GAAqC;QAAhCC,SAAgC,QAAhCA,SAAgC;iCAArB0c,YAAqB;QAArBA,YAAqB,qCAAN,EAAM;;gBACpC1c,aAAaC,IAAIC,KAAJ,CAAUH,GAAV,CAAzB;;QAEMib,aAAarM,aAAa5O,GAAb,CAAnB;QACM0c,UAAUxN,eAAelP,GAAf,EAAoBC,SAApB,CAAhB;;QAEMmd,QAAQ5c,EAAE,SAAF,EAAawS,OAAb,EAAd;;QAEM0K,cAAcP,WAAW;kBAAA;4BAAA;sBAAA;0BAAA;UAAA;;KAAX,CAApB;;;QAUI,CAACO,WAAL,EAAkB,OAAO,IAAP;;;;QAIZC,UAAU,iBAAgBD,WAAhB,EAA6Bva,MAA7B,CAAoC,UAACC,GAAD,EAAMma,IAAN,EAAe;UAC3DK,aAAaF,YAAYH,IAAZ,CAAnB;aACOK,WAAW1R,KAAX,GAAmB9I,IAAI8I,KAAvB,GAA+B0R,UAA/B,GAA4Cxa,GAAnD;KAFc,EAGb,EAAE8I,OAAO,CAAC,GAAV,EAHa,CAAhB;;;;QAOIyR,QAAQzR,KAAR,IAAiB,EAArB,EAAyB;aAChByR,QAAQ5C,IAAf;;;WAGK,IAAP;;CAlCJ,CAsCA;;AChDO,IAAM8C,2BAA2B,CACtC,QADsC,CAAjC;;ACKP,SAASC,WAAT,CAAqB9d,GAArB,EAA0B;MAClBC,YAAYC,IAAIC,KAAJ,CAAUH,GAAV,CAAlB;MACQ1B,QAFgB,GAEH2B,SAFG,CAEhB3B,QAFgB;;SAGjBA,QAAP;;;AAGF,SAASiE,MAAT,CAAgBvC,GAAhB,EAAqB;SACZ;YAAA;YAEG8d,YAAY9d,GAAZ;GAFV;;;AAMF,IAAM+d,sBAAsB;SAAA,yBACK;QAArBvd,CAAqB,QAArBA,CAAqB;QAAlBR,GAAkB,QAAlBA,GAAkB;QAAbsY,SAAa,QAAbA,SAAa;;QACvB0F,aAAaxd,EAAE,qBAAF,CAAnB;QACIwd,WAAWjb,MAAX,KAAsB,CAA1B,EAA6B;UACrBgY,OAAOiD,WAAWhd,IAAX,CAAgB,MAAhB,CAAb;UACI+Z,IAAJ,EAAU;eACDxY,OAAOwY,IAAP,CAAP;;;;QAIEkD,UAAUzL,mBAAgBhS,CAAhB,EAAmBqd,wBAAnB,EAA6CvF,SAA7C,CAAhB;QACI2F,OAAJ,EAAa;aACJ1b,OAAO0b,OAAP,CAAP;;;WAGK1b,OAAOvC,GAAP,CAAP;;CAfJ,CAoBA;;ACtCO,IAAMke,yBAAyB,CACpC,gBADoC,EAEpC,qBAFoC,CAA/B;;ACSA,SAAS/b,OAAT,CAAeM,OAAf,EAAwBjC,CAAxB,EAA4C;MAAjB2d,SAAiB,uEAAL,GAAK;;YACvC1b,QAAQ+B,OAAR,CAAgB,UAAhB,EAA4B,GAA5B,EAAiC0G,IAAjC,EAAV;SACOkT,UAAU3b,OAAV,EAAmB0b,SAAnB,EAA8B,EAAEE,SAAS,UAAX,EAA9B,CAAP;;;AAGF,IAAMC,0BAA0B;SAAA,yBACK;QAAzB9d,CAAyB,QAAzBA,CAAyB;QAAtBiC,OAAsB,QAAtBA,OAAsB;QAAb6V,SAAa,QAAbA,SAAa;;QAC3BjD,UAAU7C,mBAAgBhS,CAAhB,EAAmB0d,sBAAnB,EAA2C5F,SAA3C,CAAhB;QACIjD,OAAJ,EAAa;aACJlT,QAAM+Q,UAAUmC,OAAV,EAAmB7U,CAAnB,CAAN,CAAP;;;QAGI2d,YAAY,GAAlB;QACMI,eAAe9b,QAAQsK,KAAR,CAAc,CAAd,EAAiBoR,YAAY,CAA7B,CAArB;WACOhc,QAAM3B,EAAE+d,YAAF,EAAgBtT,IAAhB,EAAN,EAA8BzK,CAA9B,EAAiC2d,SAAjC,CAAP;;CATJ,CAaA;;ACvBA,IAAMK,4BAA4B;SAAA,yBACX;QAAX/b,OAAW,QAAXA,OAAW;;QACbjC,IAAImC,QAAQC,IAAR,CAAaH,OAAb,CAAV;;QAEMwI,OAAO8C,gBAAgBvN,EAAE,KAAF,EAASsQ,KAAT,GAAiB7F,IAAjB,EAAhB,CAAb;WACOA,KAAK7F,KAAL,CAAW,IAAX,EAAiBrC,MAAxB;;CALJ,CASA;;ACCA,IAAM0b,mBAAmB;;UAEf,GAFe;SAGhBpG,sBAAsBqG,OAHN;kBAIPxF,8BAA8BwF,OAJvB;UAKf9F,uBAAuB8F,OALR;WAMd9G,wBAAwB8G,OAAxB,CAAgCC,IAAhC,CAAqC/G,uBAArC,CANc;kBAOP6C,6BAA6BiE,OAPtB;OAQlBtF,oBAAoBsF,OARF;iBASRjB,4BAA4BiB,OATpB;kBAUPX,oBAAoBW,OAVb;WAWdJ,wBAAwBI,OAXV;cAYXF,0BAA0BE,OAZf;aAaZ;QAAG/M,KAAH,QAAGA,KAAH;WAAeiN,gBAAgBC,YAAhB,CAA6BlN,KAA7B,CAAf;GAbY;;SAAA,mBAef3S,OAfe,EAeN;QACPyU,IADO,GACEzU,OADF,CACPyU,IADO;;;QAGXA,IAAJ,EAAU;UACFjT,IAAImC,QAAQC,IAAR,CAAa6Q,IAAb,CAAV;cACQjT,CAAR,GAAYA,CAAZ;;;QAGImR,QAAQ,KAAKA,KAAL,CAAW3S,OAAX,CAAd;QACM8f,iBAAiB,KAAKA,cAAL,CAAoB9f,OAApB,CAAvB;QACM+V,SAAS,KAAKA,MAAL,CAAY/V,OAAZ,CAAf;QACMyD,UAAU,KAAKA,OAAL,cAAkBzD,OAAlB,IAA2B2S,YAA3B,IAAhB;QACMoN,iBAAiB,KAAKA,cAAL,cAAyB/f,OAAzB,IAAkCyD,gBAAlC,IAAvB;QACM2S,MAAM,KAAKA,GAAL,cAAcpW,OAAd,IAAuByD,gBAAvB,IAAZ;QACMuc,gBAAgB,KAAKA,aAAL,CAAmBhgB,OAAnB,CAAtB;QACMqW,UAAU,KAAKA,OAAL,cAAkBrW,OAAlB,IAA2ByD,gBAA3B,IAAhB;QACMwc,aAAa,KAAKA,UAAL,cAAqBjgB,OAArB,IAA8ByD,gBAA9B,IAAnB;QACMyc,YAAY,KAAKA,SAAL,CAAe,EAAEvN,YAAF,EAAf,CAAlB;;0BACwB,KAAKwN,cAAL,CAAoBngB,OAApB,CAlBT;QAkBPgB,GAlBO,mBAkBPA,GAlBO;QAkBFqD,MAlBE,mBAkBFA,MAlBE;;WAoBR;kBAAA;oBAAA;sBAGWyb,kBAAkB,IAH7B;cAAA;oCAAA;sBAAA;kCAAA;cAAA;oBAAA;sBAAA;4BAAA;;KAAP;;CAnCJ,CAoDA;;AC7De,SAASM,YAAT,CAAsBpf,GAAtB,EAA2BC,SAA3B,EAAsC;cACvCA,aAAaC,IAAIC,KAAJ,CAAUH,GAAV,CAAzB;mBACqBC,SAF8B;MAE3C3B,QAF2C,cAE3CA,QAF2C;;MAG7C+gB,aAAa/gB,SAAS8G,KAAT,CAAe,GAAf,EAAoB2H,KAApB,CAA0B,CAAC,CAA3B,EAA8BlO,IAA9B,CAAmC,GAAnC,CAAnB;;SAEOygB,WAAWhhB,QAAX,KAAwBghB,WAAWD,UAAX,CAAxB,IAAkDZ,gBAAzD;;;ACNF;AACA,AAAO,SAASc,gBAAT,CAA0BrN,QAA1B,EAAoC1R,CAApC,QAAkD;MAAT2B,KAAS,QAATA,KAAS;;MACnD,CAACA,KAAL,EAAY,OAAO+P,QAAP;;IAEV/P,MAAMtD,IAAN,CAAW,GAAX,CAAF,EAAmBqT,QAAnB,EAA6BhQ,MAA7B;;SAEOgQ,QAAP;;;;AAIF,AAAO,SAASsN,iBAAT,CAA2BtN,QAA3B,EAAqC1R,CAArC,SAAwD;MAAdif,UAAc,SAAdA,UAAc;;MACzD,CAACA,UAAL,EAAiB,OAAOvN,QAAP;;mBAEDuN,UAAhB,EAA4Bhe,OAA5B,CAAoC,UAACyE,GAAD,EAAS;QACrCwZ,WAAWlf,EAAE0F,GAAF,EAAOgM,QAAP,CAAjB;QACMnR,QAAQ0e,WAAWvZ,GAAX,CAAd;;;QAGI,OAAOnF,KAAP,KAAiB,QAArB,EAA+B;eACpBJ,IAAT,CAAc,UAACgB,KAAD,EAAQd,IAAR,EAAiB;sBACfL,EAAEK,IAAF,CAAd,EAAuBL,CAAvB,EAA0Bif,WAAWvZ,GAAX,CAA1B;OADF;KADF,MAIO,IAAI,OAAOnF,KAAP,KAAiB,UAArB,EAAiC;;eAE7BJ,IAAT,CAAc,UAACgB,KAAD,EAAQd,IAAR,EAAiB;YACvB0B,SAASxB,MAAMP,EAAEK,IAAF,CAAN,EAAeL,CAAf,CAAf;;YAEI,OAAO+B,MAAP,KAAkB,QAAtB,EAAgC;wBAChB/B,EAAEK,IAAF,CAAd,EAAuBL,CAAvB,EAA0B+B,MAA1B;;OAJJ;;GAXJ;;SAqBO2P,QAAP;;;AAGF,SAASyN,oBAAT,CAA8Bnf,CAA9B,EAAiC+S,SAAjC,EAA4C;SACnCA,UAAUxR,IAAV,CAAe,UAAC2E,QAAD,EAAc;QAC9BkZ,MAAMC,OAAN,CAAcnZ,QAAd,CAAJ,EAA6B;qCACTA,QADS;UACpBoZ,CADoB;UACjB9e,IADiB;;aAEpBR,EAAEsf,CAAF,EAAK/c,MAAL,KAAgB,CAAhB,IAAqBvC,EAAEsf,CAAF,EAAK9e,IAAL,CAAUA,IAAV,CAArB,IAAwCR,EAAEsf,CAAF,EAAK9e,IAAL,CAAUA,IAAV,EAAgBkK,IAAhB,OAA2B,EAA1E;;;WAGK1K,EAAEkG,QAAF,EAAY3D,MAAZ,KAAuB,CAAvB,IAA4BvC,EAAEkG,QAAF,EAAYuE,IAAZ,GAAmBC,IAAnB,OAA8B,EAAjE;GANK,CAAP;;;AAUF,AAAO,SAAS6U,MAAT,CAAgBrI,IAAhB,EAAsB;MACnBlX,CADmB,GAC8BkX,IAD9B,CACnBlX,CADmB;MAChBoB,IADgB,GAC8B8V,IAD9B,CAChB9V,IADgB;MACVoe,cADU,GAC8BtI,IAD9B,CACVsI,cADU;0BAC8BtI,IAD9B,CACMuI,WADN;MACMA,WADN,qCACoB,KADpB;;;MAGvB,CAACD,cAAL,EAAqB,OAAO,IAAP;;;;MAIjB,OAAOA,cAAP,KAA0B,QAA9B,EAAwC,OAAOA,cAAP;;MAEhCzM,SATmB,GASkByM,cATlB,CASnBzM,SATmB;8BASkByM,cATlB,CASR/J,cATQ;MASRA,cATQ,yCASS,IATT;;;MAWrBiK,mBAAmBP,qBAAqBnf,CAArB,EAAwB+S,SAAxB,CAAzB;;MAEI,CAAC2M,gBAAL,EAAuB,OAAO,IAAP;;;;;;;;MAQnBD,WAAJ,EAAiB;QACX/N,WAAW1R,EAAE0f,gBAAF,CAAf;;;aAGSC,IAAT,CAAc3f,EAAE,aAAF,CAAd;eACW0R,SAASpH,MAAT,EAAX;;eAEW0U,kBAAkBtN,QAAlB,EAA4B1R,CAA5B,EAA+Bwf,cAA/B,CAAX;eACWT,iBAAiBrN,QAAjB,EAA2B1R,CAA3B,EAA8Bwf,cAA9B,CAAX;;eAEW1I,SAAS1V,IAAT,EAAesQ,QAAf,eAA8BwF,IAA9B,IAAoCzB,8BAApC,IAAX;;WAEOzV,EAAEiT,IAAF,CAAOvB,QAAP,CAAP;;;MAGE3P,eAAJ;;;;MAIIqd,MAAMC,OAAN,CAAcK,gBAAd,CAAJ,EAAqC;2CACVA,gBADU;QAC5BxZ,QAD4B;QAClB1F,IADkB;;aAE1BR,EAAEkG,QAAF,EAAY1F,IAAZ,CAAiBA,IAAjB,EAAuBkK,IAAvB,EAAT;GAFF,MAGO;aACI1K,EAAE0f,gBAAF,EAAoBjV,IAApB,GAA2BC,IAA3B,EAAT;;;;;MAKE+K,cAAJ,EAAoB;WACXqB,SAAS1V,IAAT,EAAeW,MAAf,EAAuBmV,IAAvB,CAAP;;;SAGKnV,MAAP;;;AAGF,SAAS6d,aAAT,CAAuB1I,IAAvB,EAA6B;MACnB9V,IADmB,GACkB8V,IADlB,CACnB9V,IADmB;MACbqB,SADa,GACkByU,IADlB,CACbzU,SADa;uBACkByU,IADlB,CACF2I,QADE;MACFA,QADE,kCACS,IADT;;;MAGrB9d,SAASwd,oBAAYrI,IAAZ,IAAkBsI,gBAAgB/c,UAAUrB,IAAV,CAAlC,IAAf;;;MAGIW,MAAJ,EAAY;WACHA,MAAP;;;;;MAKE8d,QAAJ,EAAc,OAAO5B,iBAAiB7c,IAAjB,EAAuB8V,IAAvB,CAAP;;SAEP,IAAP;;;AAGF,IAAM4I,gBAAgB;SAAA,qBACwB;QAApCrd,SAAoC,uEAAxBwb,gBAAwB;QAAN/G,IAAM;gBACFA,IADE;QAClC6I,WADkC,SAClCA,WADkC;QACrBC,cADqB,SACrBA,cADqB;;;QAGtCvd,UAAUI,MAAV,KAAqB,GAAzB,EAA8B,OAAOJ,UAAUyb,OAAV,CAAkBhH,IAAlB,CAAP;;wBAGzBA,IADL;;;;QAKI6I,WAAJ,EAAiB;UACT9d,WAAU2d,2BACX1I,IADW,IACL9V,MAAM,SADD,EACYqe,aAAa,IADzB,EAC+BtO,OAAO6O;SADtD;aAGO;;OAAP;;QAII7O,QAAQyO,2BAAmB1I,IAAnB,IAAyB9V,MAAM,OAA/B,IAAd;QACMkd,iBAAiBsB,2BAAmB1I,IAAnB,IAAyB9V,MAAM,gBAA/B,IAAvB;QACMmT,SAASqL,2BAAmB1I,IAAnB,IAAyB9V,MAAM,QAA/B,IAAf;QACMod,gBAAgBoB,2BAAmB1I,IAAnB,IAAyB9V,MAAM,eAA/B,IAAtB;QACMa,UAAU2d,2BACX1I,IADW,IACL9V,MAAM,SADD,EACYqe,aAAa,IADzB,EAC+BtO;OAD/C;QAGMoN,iBAAiBqB,2BAAmB1I,IAAnB,IAAyB9V,MAAM,gBAA/B,EAAiDa,gBAAjD,IAAvB;QACM4S,UAAU+K,2BAAmB1I,IAAnB,IAAyB9V,MAAM,SAA/B,EAA0Ca,gBAA1C,IAAhB;QACM2S,MAAMgL,2BAAmB1I,IAAnB,IAAyB9V,MAAM,KAA/B,EAAsCa,gBAAtC,EAA+C4S,gBAA/C,IAAZ;QACM4J,aAAamB,2BAAmB1I,IAAnB,IAAyB9V,MAAM,YAA/B,EAA6Ca,gBAA7C,IAAnB;QACMyc,YAAYkB,2BAAmB1I,IAAnB,IAAyB9V,MAAM,WAA/B,EAA4C+P,YAA5C,IAAlB;;gBAEEyO,2BAAmB1I,IAAnB,IAAyB9V,MAAM,gBAA/B,QAAsD,EAAE5B,KAAK,IAAP,EAAaqD,QAAQ,IAArB,EA/Bd;QA8BlCrD,GA9BkC,SA8BlCA,GA9BkC;QA8B7BqD,MA9B6B,SA8B7BA,MA9B6B;;WAiCnC;kBAAA;sBAAA;oBAAA;oCAAA;oCAAA;cAAA;kCAAA;cAAA;oBAAA;sBAAA;4BAAA;;KAAP;;CAlCJ,CAmDA;;AC3KA;wDAAe;QAEX2b,aAFW,SAEXA,aAFW;QAGXvL,IAHW,SAGXA,IAHW;QAIXjT,CAJW,SAIXA,CAJW;QAKX8X,SALW,SAKXA,SALW;QAMX/V,MANW,SAMXA,MANW;QAOXke,SAPW,SAOXA,SAPW;QAQX9O,KARW,SAQXA,KARW;QASX3R,GATW,SASXA,GATW;;;;;;;iBAAA,GAaD,CAbC;wBAAA,GAcQ,CAAC4O,aAAa5O,GAAb,CAAD,CAdR;;;;;;kBAkBNgf,iBAAiB0B,QAAQ,EAlBnB;;;;;qBAmBF,CAAT;;mBACUte,SAASue,MAAT,CAAgB3B,aAAhB,CApBC;;;aAAA;;mBAqBJxe,EAAEiT,IAAF,EAAP;;yBArBW,GAuBW;mBACfuL,aADe;wBAAA;kBAAA;kCAAA;2BAKP,IALO;8BAMJrN,KANI;;aAvBX;0BAAA,GAiCY2O,cAAc5B,OAAd,CAAsB+B,SAAtB,EAAiCG,aAAjC,CAjCZ;;;yBAmCEjR,IAAb,CAAkBqP,aAAlB;kCAEKzc,MADL;sCAGMA,OAAOE,OADX,yCAGaie,KAHb,uBAIIG,eAAepe,OAJnB;;;4BAQcoe,eAAe7B,aAA/B;;;;;sBA9CW,GAiDMP,iBAAiBQ,UAAjB,CAA4B,EAAExc,mBAAiBF,OAAOE,OAAxB,WAAF,EAA5B,CAjDN;0DAmDRF,MAnDQ;2BAoDEme,KApDF;8BAqDKA,KArDL;;;;;;;;;;GAAf;;WAA8BI,eAA9B;;;;SAA8BA,eAA9B;;;ACMA,IAAMC,UAAU;OAAA,iBACF/gB,GADE,EACGyT,IADH,EACoB;;;QAAXiE,IAAW,uEAAJ,EAAI;;;;;;;;oCAI5BA,IAJ4B,CAE9BsJ,aAF8B,EAE9BA,aAF8B,uCAEd,IAFc,yCAI5BtJ,IAJ4B,CAG9B2I,QAH8B,EAG9BA,QAH8B,kCAGnB,IAHmB;uBAAA,GAMdngB,IAAIC,KAAJ,CAAUH,GAAV,CANc;;kBAQ3B3B,YAAY4B,SAAZ,CAR2B;;;;;+CASvB1B,OAAO8B,MATgB;;;uBAAA,GAYd+e,aAAapf,GAAb,EAAkBC,SAAlB,CAZc;;;;qBAehBmC,SAASue,MAAT,CAAgB3gB,GAAhB,EAAqByT,IAArB,EAA2BxT,SAA3B,CAfgB;;;eAAA;;mBAkB5BO,EAAEb,KAlB0B;;;;;+CAmBvBa,CAnBuB;;;;qBAsBzBA,EAAEiT,IAAF,EAAP;;;;uBAtBgC,GA0BdjT,EAAE,MAAF,EAAUiG,GAAV,CAAc,UAAC7F,CAAD,EAAIC,IAAJ;uBAAaL,EAAEK,IAAF,EAAQG,IAAR,CAAa,MAAb,CAAb;eAAd,EAAiDgS,OAAjD,EA1Bc;oBAAA,GA4BnBsN,cAAc5B,OAAd,CAAsB+B,SAAtB,EAAiC,EAAEzgB,QAAF,EAAOyT,UAAP,EAAajT,IAAb,EAAgB8X,oBAAhB,EAA2BrY,oBAA3B,EAAsCogB,kBAAtC,EAAjC,CA5BmB;wBA6BC9d,MA7BD,EA6BxBoP,KA7BwB,WA6BxBA,KA7BwB,EA6BjBqN,aA7BiB,WA6BjBA,aA7BiB;;;;oBAgC5BgC,iBAAiBhC,aAhCW;;;;;;qBAiCf8B,gBACb;oCAAA;4CAAA;0BAAA;oBAAA;oCAAA;8BAAA;4BAAA;;eADa,CAjCe;;;oBAAA;;;;;oCA+CzBve,MADL;6BAEe,CAFf;gCAGkB;;;;+CAIbA,MArDyB;;;;;;;;;GADpB;;;;;eAAA,yBA2DMvC,GA3DN,EA2DW;;;;;;;;;qBACVoC,SAASue,MAAT,CAAgB3gB,GAAhB,CADU;;;;;;;;;;;;;CA3D3B,CAiEA;;"}
\ No newline at end of file
+{"version":3,"file":null,"sources":["../src/utils/range.js","../src/utils/validate-url.js","../src/utils/errors.js","../src/resource/utils/constants.js","../src/resource/utils/fetch-resource.js","../src/resource/utils/dom/normalize-meta-tags.js","../src/utils/dom/constants.js","../src/utils/dom/strip-unlikely-candidates.js","../src/utils/dom/brs-to-ps.js","../src/utils/dom/paragraphize.js","../src/utils/dom/convert-to-paragraphs.js","../src/utils/dom/convert-node-to.js","../src/utils/dom/clean-images.js","../src/utils/dom/mark-to-keep.js","../src/utils/dom/strip-junk-tags.js","../src/utils/dom/clean-h-ones.js","../src/utils/dom/clean-attributes.js","../src/utils/dom/remove-empty.js","../src/extractors/generic/content/scoring/constants.js","../src/extractors/generic/content/scoring/get-weight.js","../src/extractors/generic/content/scoring/get-score.js","../src/extractors/generic/content/scoring/score-commas.js","../src/extractors/generic/content/scoring/score-length.js","../src/extractors/generic/content/scoring/score-paragraph.js","../src/extractors/generic/content/scoring/set-score.js","../src/extractors/generic/content/scoring/add-score.js","../src/extractors/generic/content/scoring/add-to-parent.js","../src/extractors/generic/content/scoring/get-or-init-score.js","../src/extractors/generic/content/scoring/score-node.js","../src/extractors/generic/content/scoring/score-content.js","../src/utils/text/normalize-spaces.js","../src/utils/text/extract-from-url.js","../src/utils/text/constants.js","../src/utils/text/page-num-from-url.js","../src/utils/text/remove-anchor.js","../src/utils/text/article-base-url.js","../src/utils/text/has-sentence-end.js","../src/utils/text/excerpt-content.js","../src/extractors/generic/content/scoring/merge-siblings.js","../src/extractors/generic/content/scoring/find-top-candidate.js","../src/extractors/generic/content/scoring/index.js","../src/utils/dom/clean-tags.js","../src/utils/dom/clean-headers.js","../src/utils/dom/rewrite-top-level.js","../src/utils/dom/make-links-absolute.js","../src/utils/dom/link-density.js","../src/utils/dom/extract-from-meta.js","../src/utils/dom/extract-from-selectors.js","../src/utils/dom/strip-tags.js","../src/utils/dom/within-comment.js","../src/utils/dom/node-is-sufficient.js","../src/utils/dom/is-wordpress.js","../src/utils/dom/get-attrs.js","../src/utils/dom/set-attr.js","../src/utils/dom/set-attrs.js","../src/utils/dom/index.js","../src/resource/utils/dom/constants.js","../src/resource/utils/dom/convert-lazy-loaded-images.js","../src/resource/utils/dom/clean.js","../src/resource/index.js","../src/utils/merge-supported-domains.js","../src/extractors/custom/blogspot.com/index.js","../src/extractors/custom/nymag.com/index.js","../src/extractors/custom/wikipedia.org/index.js","../src/extractors/custom/twitter.com/index.js","../src/extractors/custom/www.nytimes.com/index.js","../src/extractors/custom/www.theatlantic.com/index.js","../src/extractors/custom/www.newyorker.com/index.js","../src/extractors/custom/www.wired.com/index.js","../src/extractors/custom/www.msn.com/index.js","../src/extractors/custom/www.yahoo.com/index.js","../src/extractors/custom/www.buzzfeed.com/index.js","../src/extractors/custom/fandom.wikia.com/index.js","../src/extractors/custom/www.littlethings.com/index.js","../src/extractors/custom/www.politico.com/index.js","../src/extractors/custom/deadspin.com/index.js","../src/extractors/custom/www.broadwayworld.com/index.js","../src/extractors/custom/www.apartmenttherapy.com/index.js","../src/extractors/custom/medium.com/index.js","../src/extractors/all.js","../src/cleaners/constants.js","../src/cleaners/author.js","../src/cleaners/lead-image-url.js","../src/cleaners/dek.js","../src/cleaners/date-published.js","../src/cleaners/content.js","../src/cleaners/title.js","../src/cleaners/resolve-split-title.js","../src/cleaners/index.js","../src/extractors/generic/content/extract-best-node.js","../src/extractors/generic/content/extractor.js","../src/extractors/generic/title/constants.js","../src/extractors/generic/title/extractor.js","../src/extractors/generic/author/constants.js","../src/extractors/generic/author/extractor.js","../src/extractors/generic/date-published/constants.js","../src/extractors/generic/date-published/extractor.js","../src/extractors/generic/dek/extractor.js","../src/extractors/generic/lead-image-url/constants.js","../src/extractors/generic/lead-image-url/score-image.js","../src/extractors/generic/lead-image-url/extractor.js","../src/extractors/generic/next-page-url/scoring/utils/score-similarity.js","../src/extractors/generic/next-page-url/scoring/utils/score-link-text.js","../src/extractors/generic/next-page-url/scoring/utils/score-page-in-link.js","../src/extractors/generic/next-page-url/scoring/constants.js","../src/extractors/generic/next-page-url/scoring/utils/score-extraneous-links.js","../src/extractors/generic/next-page-url/scoring/utils/score-by-parents.js","../src/extractors/generic/next-page-url/scoring/utils/score-prev-link.js","../src/extractors/generic/next-page-url/scoring/utils/should-score.js","../src/extractors/generic/next-page-url/scoring/utils/score-base-url.js","../src/extractors/generic/next-page-url/scoring/utils/score-next-link-text.js","../src/extractors/generic/next-page-url/scoring/utils/score-cap-links.js","../src/extractors/generic/next-page-url/scoring/score-links.js","../src/extractors/generic/next-page-url/extractor.js","../src/extractors/generic/url/constants.js","../src/extractors/generic/url/extractor.js","../src/extractors/generic/excerpt/constants.js","../src/extractors/generic/excerpt/extractor.js","../src/extractors/generic/word-count/extractor.js","../src/extractors/generic/index.js","../src/extractors/get-extractor.js","../src/extractors/root-extractor.js","../src/extractors/collect-all-pages.js","../src/mercury.js"],"sourcesContent":["export default function* range(start = 1, end = 1) {\n while (start <= end) {\n yield start += 1;\n }\n}\n","// extremely simple url validation as a first step\nexport default function validateUrl({ hostname }) {\n // If this isn't a valid url, return an error message\n return !!hostname;\n}\n","const Errors = {\n badUrl: {\n error: true,\n messages: 'The url parameter passed does not look like a valid URL. Please check your data and try again.',\n },\n};\n\nexport default Errors;\n","import cheerio from 'cheerio';\n\n// Browser does not like us setting user agent\nexport const REQUEST_HEADERS = cheerio.browser ? {} : {\n 'User-Agent': 'Mercury - https://mercury.postlight.com/web-parser/',\n};\n\n// The number of milliseconds to attempt to fetch a resource before timing out.\nexport const FETCH_TIMEOUT = 10000;\n\n// Content types that we do not extract content from\nconst BAD_CONTENT_TYPES = [\n 'audio/mpeg',\n 'image/gif',\n 'image/jpeg',\n 'image/jpg',\n];\n\nexport const BAD_CONTENT_TYPES_RE = new RegExp(`^(${BAD_CONTENT_TYPES.join('|')})$`, 'i');\n\n// Use this setting as the maximum size an article can be\n// for us to attempt parsing. Defaults to 5 MB.\nexport const MAX_CONTENT_LENGTH = 5242880;\n\n// Turn the global proxy on or off\n// Proxying is not currently enabled in Python source\n// so not implementing logic in port.\nexport const PROXY_DOMAINS = false;\nexport const REQUESTS_PROXIES = {\n http: 'http://38.98.105.139:33333',\n https: 'http://38.98.105.139:33333',\n};\n\nexport const DOMAINS_TO_PROXY = [\n 'nih.gov',\n 'gutenberg.org',\n];\n","import URL from 'url';\nimport request from 'request';\nimport { Errors } from 'utils';\n\nimport {\n REQUEST_HEADERS,\n FETCH_TIMEOUT,\n BAD_CONTENT_TYPES_RE,\n MAX_CONTENT_LENGTH,\n} from './constants';\n\nfunction get(options) {\n return new Promise((resolve, reject) => {\n request(options, (err, response, body) => {\n if (err) {\n reject(err);\n } else {\n resolve({ body, response });\n }\n });\n });\n}\n\n// Evaluate a response to ensure it's something we should be keeping.\n// This does not validate in the sense of a response being 200 level or\n// not. Validation here means that we haven't found reason to bail from\n// further processing of this url.\n\nexport function validateResponse(response, parseNon2xx = false) {\n // Check if we got a valid status code\n // This isn't great, but I'm requiring a statusMessage to be set\n // before short circuiting b/c nock doesn't set it in tests\n // statusMessage only not set in nock response, in which case\n // I check statusCode, which is currently only 200 for OK responses\n // in tests\n if (\n (response.statusMessage && response.statusMessage !== 'OK') ||\n response.statusCode !== 200\n ) {\n if (!response.statusCode) {\n throw new Error(\n `Unable to fetch content. Original exception was ${response.error}`\n );\n } else if (!parseNon2xx) {\n throw new Error(\n `Resource returned a response status code of ${response.statusCode} and resource was instructed to reject non-2xx level status codes.`\n );\n }\n }\n\n const {\n 'content-type': contentType,\n 'content-length': contentLength,\n } = response.headers;\n\n // Check that the content is not in BAD_CONTENT_TYPES\n if (BAD_CONTENT_TYPES_RE.test(contentType)) {\n throw new Error(\n `Content-type for this resource was ${contentType} and is not allowed.`\n );\n }\n\n // Check that the content length is below maximum\n if (contentLength > MAX_CONTENT_LENGTH) {\n throw new Error(\n `Content for this resource was too large. Maximum content length is ${MAX_CONTENT_LENGTH}.`\n );\n }\n\n return true;\n}\n\n// Grabs the last two pieces of the URL and joins them back together\n// This is to get the 'livejournal.com' from 'erotictrains.livejournal.com'\nexport function baseDomain({ host }) {\n return host.split('.').slice(-2).join('.');\n}\n\n// Set our response attribute to the result of fetching our URL.\n// TODO: This should gracefully handle timeouts and raise the\n// proper exceptions on the many failure cases of HTTP.\n// TODO: Ensure we are not fetching something enormous. Always return\n// unicode content for HTML, with charset conversion.\n\nexport default async function fetchResource(url, parsedUrl) {\n parsedUrl = parsedUrl || URL.parse(encodeURI(url));\n\n const options = {\n url: parsedUrl.href,\n headers: { ...REQUEST_HEADERS },\n timeout: FETCH_TIMEOUT,\n // Don't set encoding; fixes issues\n // w/gzipped responses\n encoding: null,\n // Accept cookies\n jar: true,\n // Accept and decode gzip\n gzip: true,\n // Follow any redirect\n followAllRedirects: true,\n };\n\n const { response, body } = await get(options);\n\n try {\n validateResponse(response);\n return {\n body,\n response,\n };\n } catch (e) {\n return Errors.badUrl;\n }\n}\n","function convertMetaProp($, from, to) {\n $(`meta[${from}]`).each((_, node) => {\n const $node = $(node);\n\n const value = $node.attr(from);\n $node.attr(to, value);\n $node.removeAttr(from);\n });\n\n return $;\n}\n\n// For ease of use in extracting from meta tags,\n// replace the \"content\" attribute on meta tags with the\n// \"value\" attribute.\n//\n// In addition, normalize 'property' attributes to 'name' for ease of\n// querying later. See, e.g., og or twitter meta tags.\n\nexport default function normalizeMetaTags($) {\n $ = convertMetaProp($, 'content', 'value');\n $ = convertMetaProp($, 'property', 'name');\n return $;\n}\n","// Spacer images to be removed\nexport const SPACER_RE = new RegExp('trans|transparent|spacer|blank', 'i');\n\n// The class we will use to mark elements we want to keep\n// but would normally remove\nexport const KEEP_CLASS = 'mercury-parser-keep';\n\nexport const KEEP_SELECTORS = [\n 'iframe[src^=\"https://www.youtube.com\"]',\n 'iframe[src^=\"http://www.youtube.com\"]',\n 'iframe[src^=\"https://player.vimeo\"]',\n 'iframe[src^=\"http://player.vimeo\"]',\n];\n\n// A list of tags to strip from the output if we encounter them.\nexport const STRIP_OUTPUT_TAGS = [\n 'title',\n 'script',\n 'noscript',\n 'link',\n 'style',\n 'hr',\n 'embed',\n 'iframe',\n 'object',\n];\n\n// cleanAttributes\nexport const REMOVE_ATTRS = ['style', 'align'];\nexport const REMOVE_ATTR_SELECTORS = REMOVE_ATTRS.map(selector => `[${selector}]`);\nexport const REMOVE_ATTR_LIST = REMOVE_ATTRS.join(',');\nexport const WHITELIST_ATTRS = ['src', 'srcset', 'href', 'class', 'id', 'alt'];\nexport const WHITELIST_ATTRS_RE = new RegExp(`^(${WHITELIST_ATTRS.join('|')})$`, 'i');\n\n// removeEmpty\nexport const REMOVE_EMPTY_TAGS = ['p'];\nexport const REMOVE_EMPTY_SELECTORS = REMOVE_EMPTY_TAGS.map(tag => `${tag}:empty`).join(',');\n\n// cleanTags\nexport const CLEAN_CONDITIONALLY_TAGS = ['ul', 'ol', 'table', 'div', 'button', 'form'].join(',');\n\n// cleanHeaders\nconst HEADER_TAGS = ['h2', 'h3', 'h4', 'h5', 'h6'];\nexport const HEADER_TAG_LIST = HEADER_TAGS.join(',');\n\n// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n 'ad-break',\n 'adbox',\n 'advert',\n 'addthis',\n 'agegate',\n 'aux',\n 'blogger-labels',\n 'combx',\n 'comment',\n 'conversation',\n 'disqus',\n 'entry-unrelated',\n 'extra',\n 'foot',\n // 'form', // This is too generic, has too many false positives\n 'header',\n 'hidden',\n 'loader',\n 'login', // Note: This can hit 'blogindex'.\n 'menu',\n 'meta',\n 'nav',\n 'outbrain',\n 'pager',\n 'pagination',\n 'predicta', // readwriteweb inline ad box\n 'presence_control_external', // lifehacker.com container full of false positives\n 'popup',\n 'printfriendly',\n 'related',\n 'remove',\n 'remark',\n 'rss',\n 'share',\n 'shoutbox',\n 'sidebar',\n 'sociable',\n 'sponsor',\n 'taboola',\n 'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n 'and',\n 'article',\n 'body',\n 'blogindex',\n 'column',\n 'content',\n 'entry-content-asset',\n 'format', // misuse of form\n 'hfeed',\n 'hentry',\n 'hatom',\n 'main',\n 'page',\n 'posts',\n 'shadow',\n];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n 'a',\n 'blockquote',\n 'dl',\n 'div',\n 'img',\n 'p',\n 'pre',\n 'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n 'br',\n 'b',\n 'i',\n 'label',\n 'hr',\n 'area',\n 'base',\n 'basefont',\n 'input',\n 'img',\n 'link',\n 'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE =\n new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n ['.hentry', '.entry-content'],\n ['entry', '.entry-content'],\n ['.entry', '.entry_content'],\n ['.post', '.postbody'],\n ['.post', '.post_body'],\n ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = [\n 'figure',\n 'photo',\n 'image',\n 'caption',\n];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n 'article',\n 'articlecontent',\n 'instapaper_body',\n 'blog',\n 'body',\n 'content',\n 'entry-content-asset',\n 'entry',\n 'hentry',\n 'main',\n 'Normal',\n 'page',\n 'pagination',\n 'permalink',\n 'post',\n 'story',\n 'text',\n '[-_]copy', // usatoday\n '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n 'adbox',\n 'advert',\n 'author',\n 'bio',\n 'bookmark',\n 'bottom',\n 'byline',\n 'clear',\n 'com-',\n 'combx',\n 'comment',\n 'comment\\\\B',\n 'contact',\n 'copy',\n 'credit',\n 'crumb',\n 'date',\n 'deck',\n 'excerpt',\n 'featured', // tnr.com has a featured_content which throws us off\n 'foot',\n 'footer',\n 'footnote',\n 'graf',\n 'head',\n 'info',\n 'infotext', // newscientist.com copyright\n 'instapaper_ignore',\n 'jump',\n 'linebreak',\n 'link',\n 'masthead',\n 'media',\n 'meta',\n 'modal',\n 'outbrain', // slate.com junk\n 'promo',\n 'pr_', // autoblog - press release\n 'related',\n 'respond',\n 'roundcontent', // lifehacker restricted content warning\n 'scroll',\n 'secondary',\n 'share',\n 'shopping',\n 'shoutbox',\n 'side',\n 'sidebar',\n 'sponsor',\n 'stamp',\n 'sub',\n 'summary',\n 'tags',\n 'tools',\n 'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// XPath to try to determine if a page is wordpress. Not always successful.\nexport const IS_WP_SELECTOR = 'meta[name=generator][value^=WordPress]';\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nexport const EXTRANEOUS_LINK_HINTS = [\n 'print',\n 'archive',\n 'comment',\n 'discuss',\n 'e-mail',\n 'email',\n 'share',\n 'reply',\n 'all',\n 'login',\n 'sign',\n 'single',\n 'adx',\n 'entry-unrelated',\n];\nexport const EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nexport const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\n// export const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^\\|]|$)|»([^\\|]|$))', 'i');\nexport const NEXT_LINK_TEXT_RE = /(next|weiter|continue|>([^|]|$)|»([^|]|$))/i;\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nexport const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nexport const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match 2 or more consecutive tags\nexport const BR_TAGS_RE = new RegExp('( ]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp(' ]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n 'article',\n 'aside',\n 'blockquote',\n 'body',\n 'br',\n 'button',\n 'canvas',\n 'caption',\n 'col',\n 'colgroup',\n 'dd',\n 'div',\n 'dl',\n 'dt',\n 'embed',\n 'fieldset',\n 'figcaption',\n 'figure',\n 'footer',\n 'form',\n 'h1',\n 'h2',\n 'h3',\n 'h4',\n 'h5',\n 'h6',\n 'header',\n 'hgroup',\n 'hr',\n 'li',\n 'map',\n 'object',\n 'ol',\n 'output',\n 'p',\n 'pre',\n 'progress',\n 'section',\n 'table',\n 'tbody',\n 'textarea',\n 'tfoot',\n 'th',\n 'thead',\n 'tr',\n 'ul',\n 'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import {\n CANDIDATES_WHITELIST,\n CANDIDATES_BLACKLIST,\n} from './constants';\n\nexport default function stripUnlikelyCandidates($) {\n // Loop through the provided document and remove any non-link nodes\n // that are unlikely candidates for article content.\n //\n // Links are ignored because there are very often links to content\n // that are identified as non-body-content, but may be inside\n // article-like content.\n //\n // :param $: a cheerio object to strip nodes from\n // :return $: the cleaned cheerio object\n $('*').not('a').each((index, node) => {\n const $node = $(node);\n const classes = $node.attr('class');\n const id = $node.attr('id');\n if (!id && !classes) return;\n\n const classAndId = `${classes || ''} ${id || ''}`;\n if (CANDIDATES_WHITELIST.test(classAndId)) {\n return;\n } else if (CANDIDATES_BLACKLIST.test(classAndId)) {\n $node.remove();\n }\n });\n\n return $;\n}\n","import { paragraphize } from './index';\n\n// ## NOTES:\n// Another good candidate for refactoring/optimizing.\n// Very imperative code, I don't love it. - AP\n\n// Given cheerio object, convert consecutive tags into\n// tags instead.\n//\n// :param $: A cheerio object\n\nexport default function brsToPs($) {\n let collapsing = false;\n $('br').each((index, element) => {\n const $element = $(element);\n const nextElement = $element.next().get(0);\n\n if (nextElement && nextElement.tagName.toLowerCase() === 'br') {\n collapsing = true;\n $element.remove();\n } else if (collapsing) {\n collapsing = false;\n // $(element).replaceWith('')\n paragraphize(element, $, true);\n }\n });\n\n return $;\n}\n","import { BLOCK_LEVEL_TAGS_RE } from './constants';\n\n// Given a node, turn it into a P if it is not already a P, and\n// make sure it conforms to the constraints of a P tag (I.E. does\n// not contain any other block tags.)\n//\n// If the node is a , it treats the following inline siblings\n// as if they were its children.\n//\n// :param node: The node to paragraphize; this is a raw node\n// :param $: The cheerio object to handle dom manipulation\n// :param br: Whether or not the passed node is a br\n\nexport default function paragraphize(node, $, br = false) {\n const $node = $(node);\n\n if (br) {\n let sibling = node.nextSibling;\n const p = $('');\n\n // while the next node is text or not a block level element\n // append it to a new p node\n while (sibling && !(sibling.tagName && BLOCK_LEVEL_TAGS_RE.test(sibling.tagName))) {\n const nextSibling = sibling.nextSibling;\n $(sibling).appendTo(p);\n sibling = nextSibling;\n }\n\n $node.replaceWith(p);\n $node.remove();\n return $;\n }\n\n return $;\n}\n","import { brsToPs, convertNodeTo } from 'utils/dom';\n\nimport { DIV_TO_P_BLOCK_TAGS } from './constants';\n\nfunction convertDivs($) {\n $('div').each((index, div) => {\n const $div = $(div);\n const convertable = $div.children(DIV_TO_P_BLOCK_TAGS).length === 0;\n\n if (convertable) {\n convertNodeTo($div, $, 'p');\n }\n });\n\n return $;\n}\n\nfunction convertSpans($) {\n $('span').each((index, span) => {\n const $span = $(span);\n const convertable = $span.parents('p, div').length === 0;\n if (convertable) {\n convertNodeTo($span, $, 'p');\n }\n });\n\n return $;\n}\n\n// Loop through the provided doc, and convert any p-like elements to\n// actual paragraph tags.\n//\n// Things fitting this criteria:\n// * Multiple consecutive tags.\n// * tags without block level elements inside of them\n// * tags who are not children of or tags.\n//\n// :param $: A cheerio object to search\n// :return cheerio object with new p elements\n// (By-reference mutation, though. Returned just for convenience.)\n\nexport default function convertToParagraphs($) {\n $ = brsToPs($);\n $ = convertDivs($);\n $ = convertSpans($);\n\n return $;\n}\n","import { getAttrs } from 'utils/dom';\n\nexport default function convertNodeTo($node, $, tag = 'p') {\n const node = $node.get(0);\n if (!node) {\n return $;\n }\n const attrs = getAttrs(node) || {};\n // console.log(attrs)\n\n const attribString = Reflect.ownKeys(attrs)\n .map(key => `${key}=${attrs[key]}`)\n .join(' ');\n let html;\n\n if ($.browser) {\n // In the browser, the contents of noscript tags aren't rendered, therefore\n // transforms on the noscript tag (commonly used for lazy-loading) don't work\n // as expected. This test case handles that\n html = node.tagName.toLowerCase() === 'noscript' ? $node.text() : $node.html();\n } else {\n html = $node.contents();\n }\n $node.replaceWith(\n `<${tag} ${attribString}>${html}${tag}>`\n );\n return $;\n}\n","import { SPACER_RE } from './constants';\n\nfunction cleanForHeight($img, $) {\n const height = parseInt($img.attr('height'), 10);\n const width = parseInt($img.attr('width'), 10) || 20;\n\n // Remove images that explicitly have very small heights or\n // widths, because they are most likely shims or icons,\n // which aren't very useful for reading.\n if ((height || 20) < 10 || width < 10) {\n $img.remove();\n } else if (height) {\n // Don't ever specify a height on images, so that we can\n // scale with respect to width without screwing up the\n // aspect ratio.\n $img.removeAttr('height');\n }\n\n return $;\n}\n\n// Cleans out images where the source string matches transparent/spacer/etc\n// TODO This seems very aggressive - AP\nfunction removeSpacers($img, $) {\n if (SPACER_RE.test($img.attr('src'))) {\n $img.remove();\n }\n\n return $;\n}\n\nexport default function cleanImages($article, $) {\n $article.find('img').each((index, img) => {\n const $img = $(img);\n\n cleanForHeight($img, $);\n removeSpacers($img, $);\n });\n\n return $;\n}\n","import URL from 'url';\n\nimport {\n KEEP_SELECTORS,\n KEEP_CLASS,\n} from './constants';\n\nexport default function markToKeep(article, $, url, tags = []) {\n if (tags.length === 0) {\n tags = KEEP_SELECTORS;\n }\n\n if (url) {\n const { protocol, hostname } = URL.parse(url);\n tags = [...tags, `iframe[src^=\"${protocol}//${hostname}\"]`];\n }\n\n $(tags.join(','), article).addClass(KEEP_CLASS);\n\n return $;\n}\n","import {\n STRIP_OUTPUT_TAGS,\n KEEP_CLASS,\n} from './constants';\n\nexport default function stripJunkTags(article, $, tags = []) {\n if (tags.length === 0) {\n tags = STRIP_OUTPUT_TAGS;\n }\n\n // Remove matching elements, but ignore\n // any element with a class of mercury-parser-keep\n $(tags.join(','), article).not(`.${KEEP_CLASS}`).remove();\n\n // Remove the mercury-parser-keep class from result\n $(`.${KEEP_CLASS}`, article).removeClass(KEEP_CLASS);\n\n return $;\n}\n","import { convertNodeTo } from 'utils/dom';\n\n// H1 tags are typically the article title, which should be extracted\n// by the title extractor instead. If there's less than 3 of them (<3),\n// strip them. Otherwise, turn 'em into H2s.\nexport default function cleanHOnes(article, $) {\n const $hOnes = $('h1', article);\n\n if ($hOnes.length < 3) {\n $hOnes.each((index, node) => $(node).remove());\n } else {\n $hOnes.each((index, node) => {\n convertNodeTo($(node), $, 'h2');\n });\n }\n\n return $;\n}\n","import {\n getAttrs,\n setAttrs,\n} from 'utils/dom';\n\nimport { WHITELIST_ATTRS_RE } from './constants';\n\nfunction removeAllButWhitelist($article) {\n $article.find('*').each((index, node) => {\n const attrs = getAttrs(node);\n\n setAttrs(node, Reflect.ownKeys(attrs).reduce((acc, attr) => {\n if (WHITELIST_ATTRS_RE.test(attr)) {\n return { ...acc, [attr]: attrs[attr] };\n }\n\n return acc;\n }, {}));\n });\n\n return $article;\n}\n\n// function removeAttrs(article, $) {\n// REMOVE_ATTRS.forEach((attr) => {\n// $(`[${attr}]`, article).removeAttr(attr);\n// });\n// }\n\n// Remove attributes like style or align\nexport default function cleanAttributes($article) {\n // Grabbing the parent because at this point\n // $article will be wrapped in a div which will\n // have a score set on it.\n return removeAllButWhitelist(\n $article.parent().length ?\n $article.parent() : $article\n );\n}\n","export default function removeEmpty($article, $) {\n $article.find('p').each((index, p) => {\n const $p = $(p);\n if ($p.find('iframe, img').length === 0 && $p.text().trim() === '') $p.remove();\n });\n\n return $;\n}\n","// // CONTENT FETCHING CONSTANTS ////\n\n// A list of strings that can be considered unlikely candidates when\n// extracting content from a resource. These strings are joined together\n// and then tested for existence using re:test, so may contain simple,\n// non-pipe style regular expression queries if necessary.\nexport const UNLIKELY_CANDIDATES_BLACKLIST = [\n 'ad-break',\n 'adbox',\n 'advert',\n 'addthis',\n 'agegate',\n 'aux',\n 'blogger-labels',\n 'combx',\n 'comment',\n 'conversation',\n 'disqus',\n 'entry-unrelated',\n 'extra',\n 'foot',\n 'form',\n 'header',\n 'hidden',\n 'loader',\n 'login', // Note: This can hit 'blogindex'.\n 'menu',\n 'meta',\n 'nav',\n 'pager',\n 'pagination',\n 'predicta', // readwriteweb inline ad box\n 'presence_control_external', // lifehacker.com container full of false positives\n 'popup',\n 'printfriendly',\n 'related',\n 'remove',\n 'remark',\n 'rss',\n 'share',\n 'shoutbox',\n 'sidebar',\n 'sociable',\n 'sponsor',\n 'tools',\n];\n\n// A list of strings that can be considered LIKELY candidates when\n// extracting content from a resource. Essentially, the inverse of the\n// blacklist above - if something matches both blacklist and whitelist,\n// it is kept. This is useful, for example, if something has a className\n// of \"rss-content entry-content\". It matched 'rss', so it would normally\n// be removed, however, it's also the entry content, so it should be left\n// alone.\n//\n// These strings are joined together and then tested for existence using\n// re:test, so may contain simple, non-pipe style regular expression queries\n// if necessary.\nexport const UNLIKELY_CANDIDATES_WHITELIST = [\n 'and',\n 'article',\n 'body',\n 'blogindex',\n 'column',\n 'content',\n 'entry-content-asset',\n 'format', // misuse of form\n 'hfeed',\n 'hentry',\n 'hatom',\n 'main',\n 'page',\n 'posts',\n 'shadow',\n];\n\n// A list of tags which, if found inside, should cause a to NOT\n// be turned into a paragraph tag. Shallow div tags without these elements\n// should be turned into tags.\nexport const DIV_TO_P_BLOCK_TAGS = [\n 'a',\n 'blockquote',\n 'dl',\n 'div',\n 'img',\n 'p',\n 'pre',\n 'table',\n].join(',');\n\n// A list of tags that should be ignored when trying to find the top candidate\n// for a document.\nexport const NON_TOP_CANDIDATE_TAGS = [\n 'br',\n 'b',\n 'i',\n 'label',\n 'hr',\n 'area',\n 'base',\n 'basefont',\n 'input',\n 'img',\n 'link',\n 'meta',\n];\n\nexport const NON_TOP_CANDIDATE_TAGS_RE =\n new RegExp(`^(${NON_TOP_CANDIDATE_TAGS.join('|')})$`, 'i');\n\n// A list of selectors that specify, very clearly, either hNews or other\n// very content-specific style content, like Blogger templates.\n// More examples here: http://microformats.org/wiki/blog-post-formats\nexport const HNEWS_CONTENT_SELECTORS = [\n ['.hentry', '.entry-content'],\n ['entry', '.entry-content'],\n ['.entry', '.entry_content'],\n ['.post', '.postbody'],\n ['.post', '.post_body'],\n ['.post', '.post-body'],\n];\n\nexport const PHOTO_HINTS = [\n 'figure',\n 'photo',\n 'image',\n 'caption',\n];\nexport const PHOTO_HINTS_RE = new RegExp(PHOTO_HINTS.join('|'), 'i');\n\n// A list of strings that denote a positive scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const POSITIVE_SCORE_HINTS = [\n 'article',\n 'articlecontent',\n 'instapaper_body',\n 'blog',\n 'body',\n 'content',\n 'entry-content-asset',\n 'entry',\n 'hentry',\n 'main',\n 'Normal',\n 'page',\n 'pagination',\n 'permalink',\n 'post',\n 'story',\n 'text',\n '[-_]copy', // usatoday\n '\\\\Bcopy',\n];\n\n// The above list, joined into a matching regular expression\nexport const POSITIVE_SCORE_RE = new RegExp(POSITIVE_SCORE_HINTS.join('|'), 'i');\n\n// Readability publisher-specific guidelines\nexport const READABILITY_ASSET = new RegExp('entry-content-asset', 'i');\n\n// A list of strings that denote a negative scoring for this content as being\n// an article container. Checked against className and id.\n//\n// TODO: Perhaps have these scale based on their odds of being quality?\nexport const NEGATIVE_SCORE_HINTS = [\n 'adbox',\n 'advert',\n 'author',\n 'bio',\n 'bookmark',\n 'bottom',\n 'byline',\n 'clear',\n 'com-',\n 'combx',\n 'comment',\n 'comment\\\\B',\n 'contact',\n 'copy',\n 'credit',\n 'crumb',\n 'date',\n 'deck',\n 'excerpt',\n 'featured', // tnr.com has a featured_content which throws us off\n 'foot',\n 'footer',\n 'footnote',\n 'graf',\n 'head',\n 'info',\n 'infotext', // newscientist.com copyright\n 'instapaper_ignore',\n 'jump',\n 'linebreak',\n 'link',\n 'masthead',\n 'media',\n 'meta',\n 'modal',\n 'outbrain', // slate.com junk\n 'promo',\n 'pr_', // autoblog - press release\n 'related',\n 'respond',\n 'roundcontent', // lifehacker restricted content warning\n 'scroll',\n 'secondary',\n 'share',\n 'shopping',\n 'shoutbox',\n 'side',\n 'sidebar',\n 'sponsor',\n 'stamp',\n 'sub',\n 'summary',\n 'tags',\n 'tools',\n 'widget',\n];\n// The above list, joined into a matching regular expression\nexport const NEGATIVE_SCORE_RE = new RegExp(NEGATIVE_SCORE_HINTS.join('|'), 'i');\n\n// Match a digit. Pretty clear.\nexport const DIGIT_RE = new RegExp('[0-9]');\n\n// Match 2 or more consecutive tags\nexport const BR_TAGS_RE = new RegExp('( ]*>[ \\n\\r\\t]*){2,}', 'i');\n\n// Match 1 BR tag.\nexport const BR_TAG_RE = new RegExp(' ]*>', 'i');\n\n// A list of all of the block level tags known in HTML5 and below. Taken from\n// http://bit.ly/qneNIT\nexport const BLOCK_LEVEL_TAGS = [\n 'article',\n 'aside',\n 'blockquote',\n 'body',\n 'br',\n 'button',\n 'canvas',\n 'caption',\n 'col',\n 'colgroup',\n 'dd',\n 'div',\n 'dl',\n 'dt',\n 'embed',\n 'fieldset',\n 'figcaption',\n 'figure',\n 'footer',\n 'form',\n 'h1',\n 'h2',\n 'h3',\n 'h4',\n 'h5',\n 'h6',\n 'header',\n 'hgroup',\n 'hr',\n 'li',\n 'map',\n 'object',\n 'ol',\n 'output',\n 'p',\n 'pre',\n 'progress',\n 'section',\n 'table',\n 'tbody',\n 'textarea',\n 'tfoot',\n 'th',\n 'thead',\n 'tr',\n 'ul',\n 'video',\n];\nexport const BLOCK_LEVEL_TAGS_RE = new RegExp(`^(${BLOCK_LEVEL_TAGS.join('|')})$`, 'i');\n\n// The removal is implemented as a blacklist and whitelist, this test finds\n// blacklisted elements that aren't whitelisted. We do this all in one\n// expression-both because it's only one pass, and because this skips the\n// serialization for whitelisted nodes.\nconst candidatesBlacklist = UNLIKELY_CANDIDATES_BLACKLIST.join('|');\nexport const CANDIDATES_BLACKLIST = new RegExp(candidatesBlacklist, 'i');\n\nconst candidatesWhitelist = UNLIKELY_CANDIDATES_WHITELIST.join('|');\nexport const CANDIDATES_WHITELIST = new RegExp(candidatesWhitelist, 'i');\n\nexport const UNLIKELY_RE = new RegExp(`!(${candidatesWhitelist})|(${candidatesBlacklist})`, 'i');\n\nexport const PARAGRAPH_SCORE_TAGS = new RegExp('^(p|li|span|pre)$', 'i');\nexport const CHILD_CONTENT_TAGS = new RegExp('^(td|blockquote|ol|ul|dl)$', 'i');\nexport const BAD_TAGS = new RegExp('^(address|form)$', 'i');\n\nexport const HTML_OR_BODY_RE = new RegExp('^(html|body)$', 'i');\n","import {\n NEGATIVE_SCORE_RE,\n POSITIVE_SCORE_RE,\n PHOTO_HINTS_RE,\n READABILITY_ASSET,\n} from './constants';\n\n// Get the score of a node based on its className and id.\nexport default function getWeight(node) {\n const classes = node.attr('class');\n const id = node.attr('id');\n let score = 0;\n\n if (id) {\n // if id exists, try to score on both positive and negative\n if (POSITIVE_SCORE_RE.test(id)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE.test(id)) {\n score -= 25;\n }\n }\n\n if (classes) {\n if (score === 0) {\n // if classes exist and id did not contribute to score\n // try to score on both positive and negative\n if (POSITIVE_SCORE_RE.test(classes)) {\n score += 25;\n }\n if (NEGATIVE_SCORE_RE.test(classes)) {\n score -= 25;\n }\n }\n\n // even if score has been set by id, add score for\n // possible photo matches\n // \"try to keep photos if we can\"\n if (PHOTO_HINTS_RE.test(classes)) {\n score += 10;\n }\n\n // add 25 if class matches entry-content-asset,\n // a class apparently instructed for use in the\n // Readability publisher guidelines\n // https://www.readability.com/developers/guidelines\n if (READABILITY_ASSET.test(classes)) {\n score += 25;\n }\n }\n\n return score;\n}\n","// returns the score of a node based on\n// the node's score attribute\n// returns null if no score set\nexport default function getScore($node) {\n return parseFloat($node.attr('score')) || null;\n}\n","// return 1 for every comma in text\nexport default function scoreCommas(text) {\n return (text.match(/,/g) || []).length;\n}\n","const idkRe = new RegExp('^(p|pre)$', 'i');\n\nexport default function scoreLength(textLength, tagName = 'p') {\n const chunks = textLength / 50;\n\n if (chunks > 0) {\n let lengthBonus;\n\n // No idea why p or pre are being tamped down here\n // but just following the source for now\n // Not even sure why tagName is included here,\n // since this is only being called from the context\n // of scoreParagraph\n if (idkRe.test(tagName)) {\n lengthBonus = chunks - 2;\n } else {\n lengthBonus = chunks - 1.25;\n }\n\n return Math.min(Math.max(lengthBonus, 0), 3);\n }\n\n return 0;\n}\n","import {\n scoreCommas,\n scoreLength,\n} from './index';\n\n// Score a paragraph using various methods. Things like number of\n// commas, etc. Higher is better.\nexport default function scoreParagraph(node) {\n let score = 1;\n const text = node.text().trim();\n const textLength = text.length;\n\n // If this paragraph is less than 25 characters, don't count it.\n if (textLength < 25) {\n return 0;\n }\n\n // Add points for any commas within this paragraph\n score += scoreCommas(text);\n\n // For every 50 characters in this paragraph, add another point. Up\n // to 3 points.\n score += scoreLength(textLength);\n\n // Articles can end with short paragraphs when people are being clever\n // but they can also end with short paragraphs setting up lists of junk\n // that we strip. This negative tweaks junk setup paragraphs just below\n // the cutoff threshold.\n if (text.slice(-1) === ':') {\n score -= 1;\n }\n\n return score;\n}\n","export default function setScore($node, $, score) {\n $node.attr('score', score);\n return $node;\n}\n","import {\n getOrInitScore,\n setScore,\n} from './index';\n\nexport default function addScore($node, $, amount) {\n try {\n const score = getOrInitScore($node, $) + amount;\n setScore($node, $, score);\n } catch (e) {\n // Ignoring; error occurs in scoreNode\n }\n\n return $node;\n}\n","import { addScore } from './index';\n\n// Adds 1/4 of a child's score to its parent\nexport default function addToParent(node, $, score) {\n const parent = node.parent();\n if (parent) {\n addScore(parent, $, score * 0.25);\n }\n\n return node;\n}\n","import {\n getScore,\n scoreNode,\n getWeight,\n addToParent,\n} from './index';\n\n// gets and returns the score if it exists\n// if not, initializes a score based on\n// the node's tag type\nexport default function getOrInitScore($node, $, weightNodes = true) {\n let score = getScore($node);\n\n if (score) {\n return score;\n }\n\n score = scoreNode($node);\n\n if (weightNodes) {\n score += getWeight($node);\n }\n\n addToParent($node, $, score);\n\n return score;\n}\n","import { scoreParagraph } from './index';\nimport {\n PARAGRAPH_SCORE_TAGS,\n CHILD_CONTENT_TAGS,\n BAD_TAGS,\n} from './constants';\n\n// Score an individual node. Has some smarts for paragraphs, otherwise\n// just scores based on tag.\nexport default function scoreNode($node) {\n const { tagName } = $node.get(0);\n\n // TODO: Consider ordering by most likely.\n // E.g., if divs are a more common tag on a page,\n // Could save doing that regex test on every node – AP\n if (PARAGRAPH_SCORE_TAGS.test(tagName)) {\n return scoreParagraph($node);\n } else if (tagName.toLowerCase() === 'div') {\n return 5;\n } else if (CHILD_CONTENT_TAGS.test(tagName)) {\n return 3;\n } else if (BAD_TAGS.test(tagName)) {\n return -3;\n } else if (tagName.toLowerCase() === 'th') {\n return -5;\n }\n\n return 0;\n}\n","import { convertNodeTo } from 'utils/dom';\n\nimport { HNEWS_CONTENT_SELECTORS } from './constants';\nimport {\n scoreNode,\n setScore,\n getOrInitScore,\n addScore,\n} from './index';\n\nfunction convertSpans($node, $) {\n if ($node.get(0)) {\n const { tagName } = $node.get(0);\n\n if (tagName === 'span') {\n // convert spans to divs\n convertNodeTo($node, $, 'div');\n }\n }\n}\n\nfunction addScoreTo($node, $, score) {\n if ($node) {\n convertSpans($node, $);\n addScore($node, $, score);\n }\n}\n\nfunction scorePs($, weightNodes) {\n $('p, pre').not('[score]').each((index, node) => {\n // The raw score for this paragraph, before we add any parent/child\n // scores.\n let $node = $(node);\n $node = setScore($node, $, getOrInitScore($node, $, weightNodes));\n\n const $parent = $node.parent();\n const rawScore = scoreNode($node);\n\n addScoreTo($parent, $, rawScore, weightNodes);\n if ($parent) {\n // Add half of the individual content score to the\n // grandparent\n addScoreTo($parent.parent(), $, rawScore / 2, weightNodes);\n }\n });\n\n return $;\n}\n\n// score content. Parents get the full value of their children's\n// content score, grandparents half\nexport default function scoreContent($, weightNodes = true) {\n // First, look for special hNews based selectors and give them a big\n // boost, if they exist\n HNEWS_CONTENT_SELECTORS.forEach(([parentSelector, childSelector]) => {\n $(`${parentSelector} ${childSelector}`).each((index, node) => {\n addScore($(node).parent(parentSelector), $, 80);\n });\n });\n\n // Doubling this again\n // Previous solution caused a bug\n // in which parents weren't retaining\n // scores. This is not ideal, and\n // should be fixed.\n scorePs($, weightNodes);\n scorePs($, weightNodes);\n\n return $;\n}\n","const NORMALIZE_RE = /\\s{2,}/g;\n\nexport default function normalizeSpaces(text) {\n return text.replace(NORMALIZE_RE, ' ').trim();\n}\n","// Given a node type to search for, and a list of regular expressions,\n// look to see if this extraction can be found in the URL. Expects\n// that each expression in r_list will return group(1) as the proper\n// string to be cleaned.\n// Only used for date_published currently.\nexport default function extractFromUrl(url, regexList) {\n const matchRe = regexList.find(re => re.test(url));\n if (matchRe) {\n return matchRe.exec(url)[1];\n }\n\n return null;\n}\n","// An expression that looks to try to find the page digit within a URL, if\n// it exists.\n// Matches:\n// page=1\n// pg=1\n// p=1\n// paging=12\n// pag=7\n// pagination/1\n// paging/88\n// pa/83\n// p/11\n//\n// Does not match:\n// pg=102\n// page:2\nexport const PAGE_IN_HREF_RE = new RegExp('(page|paging|(p(a|g|ag)?(e|enum|ewanted|ing|ination)))?(=|/)([0-9]{1,3})', 'i');\n\nexport const HAS_ALPHA_RE = /[a-z]/i;\n\nexport const IS_ALPHA_RE = /^[a-z]+$/i;\nexport const IS_DIGIT_RE = /^[0-9]+$/i;\n","import { PAGE_IN_HREF_RE } from './constants';\n\nexport default function pageNumFromUrl(url) {\n const matches = url.match(PAGE_IN_HREF_RE);\n if (!matches) return null;\n\n const pageNum = parseInt(matches[6], 10);\n\n // Return pageNum < 100, otherwise\n // return null\n return pageNum < 100 ? pageNum : null;\n}\n","export default function removeAnchor(url) {\n return url.split('#')[0].replace(/\\/$/, '');\n}\n","import URL from 'url';\n\nimport {\n HAS_ALPHA_RE,\n IS_ALPHA_RE,\n IS_DIGIT_RE,\n PAGE_IN_HREF_RE,\n} from './constants';\n\nfunction isGoodSegment(segment, index, firstSegmentHasLetters) {\n let goodSegment = true;\n\n // If this is purely a number, and it's the first or second\n // url_segment, it's probably a page number. Remove it.\n if (index < 2 && IS_DIGIT_RE.test(segment) && segment.length < 3) {\n goodSegment = true;\n }\n\n // If this is the first url_segment and it's just \"index\",\n // remove it\n if (index === 0 && segment.toLowerCase() === 'index') {\n goodSegment = false;\n }\n\n // If our first or second url_segment is smaller than 3 characters,\n // and the first url_segment had no alphas, remove it.\n if (index < 2 && segment.length < 3 && !firstSegmentHasLetters) {\n goodSegment = false;\n }\n\n return goodSegment;\n}\n\n// Take a URL, and return the article base of said URL. That is, no\n// pagination data exists in it. Useful for comparing to other links\n// that might have pagination data within them.\nexport default function articleBaseUrl(url, parsed) {\n const parsedUrl = parsed || URL.parse(url);\n const { protocol, host, path } = parsedUrl;\n\n let firstSegmentHasLetters = false;\n const cleanedSegments = path.split('/')\n .reverse()\n .reduce((acc, rawSegment, index) => {\n let segment = rawSegment;\n\n // Split off and save anything that looks like a file type.\n if (segment.includes('.')) {\n const [possibleSegment, fileExt] = segment.split('.');\n if (IS_ALPHA_RE.test(fileExt)) {\n segment = possibleSegment;\n }\n }\n\n // If our first or second segment has anything looking like a page\n // number, remove it.\n if (PAGE_IN_HREF_RE.test(segment) && index < 2) {\n segment = segment.replace(PAGE_IN_HREF_RE, '');\n }\n\n // If we're on the first segment, check to see if we have any\n // characters in it. The first segment is actually the last bit of\n // the URL, and this will be helpful to determine if we're on a URL\n // segment that looks like \"/2/\" for example.\n if (index === 0) {\n firstSegmentHasLetters = HAS_ALPHA_RE.test(segment);\n }\n\n // If it's not marked for deletion, push it to cleaned_segments.\n if (isGoodSegment(segment, index, firstSegmentHasLetters)) {\n acc.push(segment);\n }\n\n return acc;\n }, []);\n\n return `${protocol}//${host}${cleanedSegments.reverse().join('/')}`;\n}\n","// Given a string, return True if it appears to have an ending sentence\n// within it, false otherwise.\nconst SENTENCE_END_RE = new RegExp('.( |$)');\nexport default function hasSentenceEnd(text) {\n return SENTENCE_END_RE.test(text);\n}\n","export default function excerptContent(content, words = 10) {\n return content.trim()\n .split(/\\s+/)\n .slice(0, words)\n .join(' ');\n}\n","import {\n textLength,\n linkDensity,\n} from 'utils/dom';\nimport { hasSentenceEnd } from 'utils/text';\n\nimport { NON_TOP_CANDIDATE_TAGS_RE } from './constants';\nimport { getScore } from './index';\n\n// Now that we have a top_candidate, look through the siblings of\n// it to see if any of them are decently scored. If they are, they\n// may be split parts of the content (Like two divs, a preamble and\n// a body.) Example:\n// http://articles.latimes.com/2009/oct/14/business/fi-bigtvs14\nexport default function mergeSiblings($candidate, topScore, $) {\n if (!$candidate.parent().length) {\n return $candidate;\n }\n\n const siblingScoreThreshold = Math.max(10, topScore * 0.25);\n const wrappingDiv = $('');\n\n $candidate.parent().children().each((index, sibling) => {\n const $sibling = $(sibling);\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE.test(sibling.tagName)) {\n return null;\n }\n\n const siblingScore = getScore($sibling);\n if (siblingScore) {\n if ($sibling.get(0) === $candidate.get(0)) {\n wrappingDiv.append($sibling);\n } else {\n let contentBonus = 0;\n const density = linkDensity($sibling);\n\n // If sibling has a very low link density,\n // give it a small bonus\n if (density < 0.05) {\n contentBonus += 20;\n }\n\n // If sibling has a high link density,\n // give it a penalty\n if (density >= 0.5) {\n contentBonus -= 20;\n }\n\n // If sibling node has the same class as\n // candidate, give it a bonus\n if ($sibling.attr('class') === $candidate.attr('class')) {\n contentBonus += topScore * 0.2;\n }\n\n const newScore = siblingScore + contentBonus;\n\n if (newScore >= siblingScoreThreshold) {\n return wrappingDiv.append($sibling);\n } else if (sibling.tagName === 'p') {\n const siblingContent = $sibling.text();\n const siblingContentLength = textLength(siblingContent);\n\n if (siblingContentLength > 80 && density < 0.25) {\n return wrappingDiv.append($sibling);\n } else if (siblingContentLength <= 80 && density === 0 &&\n hasSentenceEnd(siblingContent)) {\n return wrappingDiv.append($sibling);\n }\n }\n }\n }\n\n return null;\n });\n\n if (wrappingDiv.children().length === 1 &&\n wrappingDiv.children().first().get(0) === $candidate.get(0)) {\n return $candidate;\n }\n\n return wrappingDiv;\n}\n","import { NON_TOP_CANDIDATE_TAGS_RE } from './constants';\nimport { getScore } from './index';\nimport mergeSiblings from './merge-siblings';\n\n// After we've calculated scores, loop through all of the possible\n// candidate nodes we found and find the one with the highest score.\nexport default function findTopCandidate($) {\n let $candidate;\n let topScore = 0;\n\n $('[score]').each((index, node) => {\n // Ignore tags like BR, HR, etc\n if (NON_TOP_CANDIDATE_TAGS_RE.test(node.tagName)) {\n return;\n }\n\n const $node = $(node);\n const score = getScore($node);\n\n if (score > topScore) {\n topScore = score;\n $candidate = $node;\n }\n });\n\n // If we don't have a candidate, return the body\n // or whatever the first element is\n if (!$candidate) {\n return $('body') || $('*').first();\n }\n\n $candidate = mergeSiblings($candidate, topScore, $);\n\n return $candidate;\n}\n","// Scoring\nexport { default as getWeight } from './get-weight';\nexport { default as getScore } from './get-score';\nexport { default as scoreCommas } from './score-commas';\nexport { default as scoreLength } from './score-length';\nexport { default as scoreParagraph } from './score-paragraph';\nexport { default as setScore } from './set-score';\nexport { default as addScore } from './add-score';\nexport { default as addToParent } from './add-to-parent';\nexport { default as getOrInitScore } from './get-or-init-score';\nexport { default as scoreNode } from './score-node';\nexport { default as scoreContent } from './score-content';\nexport { default as findTopCandidate } from './find-top-candidate';\n","import {\n getScore,\n setScore,\n getOrInitScore,\n scoreCommas,\n} from 'extractors/generic/content/scoring';\n\nimport { CLEAN_CONDITIONALLY_TAGS } from './constants';\nimport { normalizeSpaces } from '../text';\nimport { linkDensity } from './index';\n\nfunction removeUnlessContent($node, $, weight) {\n // Explicitly save entry-content-asset tags, which are\n // noted as valuable in the Publisher guidelines. For now\n // this works everywhere. We may want to consider making\n // this less of a sure-thing later.\n if ($node.hasClass('entry-content-asset')) {\n return;\n }\n\n const content = normalizeSpaces($node.text());\n\n if (scoreCommas(content) < 10) {\n const pCount = $('p', $node).length;\n const inputCount = $('input', $node).length;\n\n // Looks like a form, too many inputs.\n if (inputCount > (pCount / 3)) {\n $node.remove();\n return;\n }\n\n const contentLength = content.length;\n const imgCount = $('img', $node).length;\n\n // Content is too short, and there are no images, so\n // this is probably junk content.\n if (contentLength < 25 && imgCount === 0) {\n $node.remove();\n return;\n }\n\n const density = linkDensity($node);\n\n // Too high of link density, is probably a menu or\n // something similar.\n // console.log(weight, density, contentLength)\n if (weight < 25 && density > 0.2 && contentLength > 75) {\n $node.remove();\n return;\n }\n\n // Too high of a link density, despite the score being\n // high.\n if (weight >= 25 && density > 0.5) {\n // Don't remove the node if it's a list and the\n // previous sibling starts with a colon though. That\n // means it's probably content.\n const tagName = $node.get(0).tagName.toLowerCase();\n const nodeIsList = tagName === 'ol' || tagName === 'ul';\n if (nodeIsList) {\n const previousNode = $node.prev();\n if (previousNode && normalizeSpaces(previousNode.text()).slice(-1) === ':') {\n return;\n }\n }\n\n $node.remove();\n return;\n }\n\n const scriptCount = $('script', $node).length;\n\n // Too many script tags, not enough content.\n if (scriptCount > 0 && contentLength < 150) {\n $node.remove();\n return;\n }\n }\n}\n\n// Given an article, clean it of some superfluous content specified by\n// tags. Things like forms, ads, etc.\n//\n// Tags is an array of tag name's to search through. (like div, form,\n// etc)\n//\n// Return this same doc.\nexport default function cleanTags($article, $) {\n $(CLEAN_CONDITIONALLY_TAGS, $article).each((index, node) => {\n const $node = $(node);\n let weight = getScore($node);\n if (!weight) {\n weight = getOrInitScore($node, $);\n setScore($node, $, weight);\n }\n\n // drop node if its weight is < 0\n if (weight < 0) {\n $node.remove();\n } else {\n // deteremine if node seems like content\n removeUnlessContent($node, $, weight);\n }\n });\n\n return $;\n}\n","import { getWeight } from 'extractors/generic/content/scoring';\n\nimport { HEADER_TAG_LIST } from './constants';\nimport { normalizeSpaces } from '../text';\n\nexport default function cleanHeaders($article, $, title = '') {\n $(HEADER_TAG_LIST, $article).each((index, header) => {\n const $header = $(header);\n // Remove any headers that appear before all other p tags in the\n // document. This probably means that it was part of the title, a\n // subtitle or something else extraneous like a datestamp or byline,\n // all of which should be handled by other metadata handling.\n if ($($header, $article).prevAll('p').length === 0) {\n return $header.remove();\n }\n\n // Remove any headers that match the title exactly.\n if (normalizeSpaces($(header).text()) === title) {\n return $header.remove();\n }\n\n // If this header has a negative weight, it's probably junk.\n // Get rid of it.\n if (getWeight($(header)) < 0) {\n return $header.remove();\n }\n\n return $header;\n });\n\n return $;\n}\n","import { convertNodeTo } from 'utils/dom';\n\n// Rewrite the tag name to div if it's a top level node like body or\n// html to avoid later complications with multiple body tags.\nexport default function rewriteTopLevel(article, $) {\n // I'm not using context here because\n // it's problematic when converting the\n // top-level/root node - AP\n $ = convertNodeTo($('html'), $, 'div');\n $ = convertNodeTo($('body'), $, 'div');\n\n return $;\n}\n","import URL from 'url';\n\nimport {\n getAttrs,\n setAttr,\n} from 'utils/dom';\n\nfunction absolutize($, rootUrl, attr, $content) {\n $(`[${attr}]`, $content).each((_, node) => {\n const attrs = getAttrs(node);\n const url = attrs[attr];\n\n if (url) {\n const absoluteUrl = URL.resolve(rootUrl, url);\n setAttr(node, attr, absoluteUrl);\n }\n });\n}\n\nexport default function makeLinksAbsolute($content, $, url) {\n ['href', 'src'].forEach(attr => absolutize($, url, attr, $content));\n\n return $content;\n}\n","export function textLength(text) {\n return text.trim()\n .replace(/\\s+/g, ' ')\n .length;\n}\n\n// Determines what percentage of the text\n// in a node is link text\n// Takes a node, returns a float\nexport function linkDensity($node) {\n const totalTextLength = textLength($node.text());\n\n const linkText = $node.find('a').text();\n const linkLength = textLength(linkText);\n\n if (totalTextLength > 0) {\n return linkLength / totalTextLength;\n } else if (totalTextLength === 0 && linkLength > 0) {\n return 1;\n }\n\n return 0;\n}\n","import { stripTags } from 'utils/dom';\n\n// Given a node type to search for, and a list of meta tag names to\n// search for, find a meta tag associated.\nexport default function extractFromMeta(\n $,\n metaNames,\n cachedNames,\n cleanTags = true\n) {\n const foundNames = metaNames.filter(name => cachedNames.indexOf(name) !== -1);\n\n for (const name of foundNames) {\n const type = 'name';\n const value = 'value';\n\n const nodes = $(`meta[${type}=\"${name}\"]`);\n\n // Get the unique value of every matching node, in case there\n // are two meta tags with the same name and value.\n // Remove empty values.\n const values =\n nodes.map((index, node) => $(node).attr(value))\n .toArray()\n .filter(text => text !== '');\n\n // If we have more than one value for the same name, we have a\n // conflict and can't trust any of them. Skip this name. If we have\n // zero, that means our meta tags had no values. Skip this name\n // also.\n if (values.length === 1) {\n let metaValue;\n // Meta values that contain HTML should be stripped, as they\n // weren't subject to cleaning previously.\n if (cleanTags) {\n metaValue = stripTags(values[0], $);\n } else {\n metaValue = values[0];\n }\n\n return metaValue;\n }\n }\n\n // If nothing is found, return null\n return null;\n}\n","import { withinComment } from 'utils/dom';\n\nfunction isGoodNode($node, maxChildren) {\n // If it has a number of children, it's more likely a container\n // element. Skip it.\n if ($node.children().length > maxChildren) {\n return false;\n }\n // If it looks to be within a comment, skip it.\n if (withinComment($node)) {\n return false;\n }\n\n return true;\n}\n\n// Given a a list of selectors find content that may\n// be extractable from the document. This is for flat\n// meta-information, like author, title, date published, etc.\nexport default function extractFromSelectors(\n $,\n selectors,\n maxChildren = 1,\n textOnly = true\n) {\n for (const selector of selectors) {\n const nodes = $(selector);\n\n // If we didn't get exactly one of this selector, this may be\n // a list of articles or comments. Skip it.\n if (nodes.length === 1) {\n const $node = $(nodes[0]);\n\n if (isGoodNode($node, maxChildren)) {\n let content;\n if (textOnly) {\n content = $node.text();\n } else {\n content = $node.html();\n }\n\n if (content) {\n return content;\n }\n }\n }\n }\n\n return null;\n}\n","// strips all tags from a string of text\nexport default function stripTags(text, $) {\n // Wrapping text in html element prevents errors when text\n // has no html\n const cleanText = $(`${text}`).text();\n return cleanText === '' ? text : cleanText;\n}\n","import { getAttrs } from 'utils/dom';\n\nexport default function withinComment($node) {\n const parents = $node.parents().toArray();\n const commentParent = parents.find((parent) => {\n const attrs = getAttrs(parent);\n const { class: nodeClass, id } = attrs;\n const classAndId = `${nodeClass} ${id}`;\n return classAndId.includes('comment');\n });\n\n return commentParent !== undefined;\n}\n","// Given a node, determine if it's article-like enough to return\n// param: node (a cheerio node)\n// return: boolean\n\nexport default function nodeIsSufficient($node) {\n return $node.text().trim().length >= 100;\n}\n","import { IS_WP_SELECTOR } from './constants';\n\nexport default function isWordpress($) {\n return $(IS_WP_SELECTOR).length > 0;\n}\n","export default function getAttrs(node) {\n const { attribs, attributes } = node;\n\n if (!attribs && attributes) {\n const attrs = Reflect.ownKeys(attributes).reduce((acc, index) => {\n const attr = attributes[index];\n\n if (!attr.name || !attr.value) return acc;\n\n acc[attr.name] = attr.value;\n return acc;\n }, {});\n return attrs;\n }\n\n return attribs;\n}\n","export default function setAttr(node, attr, val) {\n if (node.attribs) {\n node.attribs[attr] = val;\n } else if (node.attributes) {\n node.setAttribute(attr, val);\n }\n\n return node;\n}\n","export default function setAttrs(node, attrs) {\n if (node.attribs) {\n node.attribs = attrs;\n } else if (node.attributes) {\n while (node.attributes.length > 0) {\n node.removeAttribute(node.attributes[0].name);\n }\n\n Reflect.ownKeys(attrs).forEach((key) => {\n node.setAttribute(key, attrs[key]);\n });\n }\n\n return node;\n}\n","// DOM manipulation\nexport { default as stripUnlikelyCandidates } from './strip-unlikely-candidates';\nexport { default as brsToPs } from './brs-to-ps';\nexport { default as paragraphize } from './paragraphize';\nexport { default as convertToParagraphs } from './convert-to-paragraphs';\nexport { default as convertNodeTo } from './convert-node-to';\nexport { default as cleanImages } from './clean-images';\nexport { default as markToKeep } from './mark-to-keep';\nexport { default as stripJunkTags } from './strip-junk-tags';\nexport { default as cleanHOnes } from './clean-h-ones';\nexport { default as cleanAttributes } from './clean-attributes';\nexport { default as removeEmpty } from './remove-empty';\nexport { default as cleanTags } from './clean-tags';\nexport { default as cleanHeaders } from './clean-headers';\nexport { default as rewriteTopLevel } from './rewrite-top-level';\nexport { default as makeLinksAbsolute } from './make-links-absolute';\nexport { textLength, linkDensity } from './link-density';\nexport { default as extractFromMeta } from './extract-from-meta';\nexport { default as extractFromSelectors } from './extract-from-selectors';\nexport { default as stripTags } from './strip-tags';\nexport { default as withinComment } from './within-comment';\nexport { default as nodeIsSufficient } from './node-is-sufficient';\nexport { default as isWordpress } from './is-wordpress';\nexport { default as getAttrs } from './get-attrs';\nexport { default as setAttr } from './set-attr';\nexport { default as setAttrs } from './set-attrs';\n","export const IS_LINK = new RegExp('https?://', 'i');\nexport const IS_IMAGE = new RegExp('.(png|gif|jpe?g)', 'i');\n\nexport const TAGS_TO_REMOVE = [\n 'script',\n 'style',\n 'form',\n].join(',');\n","import { getAttrs } from 'utils/dom';\n\nimport {\n IS_LINK,\n IS_IMAGE,\n} from './constants';\n\n// Convert all instances of images with potentially\n// lazy loaded images into normal images.\n// Many sites will have img tags with no source, or an image tag with a src\n// attribute that a is a placeholer. We need to be able to properly fill in\n// the src attribute so the images are no longer lazy loaded.\nexport default function convertLazyLoadedImages($) {\n $('img').each((_, img) => {\n const attrs = getAttrs(img);\n\n Reflect.ownKeys(attrs).forEach((attr) => {\n const value = attrs[attr];\n\n if (attr !== 'src' && IS_LINK.test(value) &&\n IS_IMAGE.test(value)) {\n $(img).attr('src', value);\n }\n });\n });\n\n return $;\n}\n","import { TAGS_TO_REMOVE } from './constants';\n\nfunction isComment(index, node) {\n return node.type === 'comment';\n}\n\nfunction cleanComments($) {\n $.root().find('*')\n .contents()\n .filter(isComment)\n .remove();\n\n return $;\n}\n\nexport default function clean($) {\n $(TAGS_TO_REMOVE).remove();\n\n $ = cleanComments($);\n return $;\n}\n","import cheerio from 'cheerio';\n\nimport { fetchResource } from './utils';\nimport {\n normalizeMetaTags,\n convertLazyLoadedImages,\n clean,\n} from './utils/dom';\n\nconst Resource = {\n\n // Create a Resource.\n //\n // :param url: The URL for the document we should retrieve.\n // :param response: If set, use as the response rather than\n // attempting to fetch it ourselves. Expects a\n // string.\n async create(url, preparedResponse, parsedUrl) {\n let result;\n\n if (preparedResponse) {\n const validResponse = {\n statusMessage: 'OK',\n statusCode: 200,\n headers: {\n 'content-type': 'text/html',\n 'content-length': 500,\n },\n };\n\n result = { body: preparedResponse, response: validResponse };\n } else {\n result = await fetchResource(url, parsedUrl);\n }\n\n if (result.error) {\n result.failed = true;\n return result;\n }\n\n return this.generateDoc(result);\n },\n\n generateDoc({ body: content, response }) {\n const { 'content-type': contentType } = response.headers;\n\n // TODO: Implement is_text function from\n // https://github.com/ReadabilityHoldings/readability/blob/8dc89613241d04741ebd42fa9fa7df1b1d746303/readability/utils/text.py#L57\n if (!contentType.includes('html') &&\n !contentType.includes('text')) {\n throw new Error('Content does not appear to be text.');\n }\n\n let $ = cheerio.load(content, { normalizeWhitespace: true });\n\n if ($.root().children().length === 0) {\n throw new Error('No children, likely a bad parse.');\n }\n\n $ = normalizeMetaTags($);\n $ = convertLazyLoadedImages($);\n $ = clean($);\n\n return $;\n },\n};\n\nexport default Resource;\n","const merge = (extractor, domains) => (\n domains.reduce((acc, domain) => {\n acc[domain] = extractor;\n return acc;\n }, {})\n);\n\nexport default function mergeSupportedDomains(extractor) {\n return extractor.supportedDomains ?\n merge(extractor, [extractor.domain, ...extractor.supportedDomains])\n :\n merge(extractor, [extractor.domain]);\n}\n","export const BloggerExtractor = {\n domain: 'blogspot.com',\n content: {\n // Blogger is insane and does not load its content\n // initially in the page, but it's all there\n // in noscript\n selectors: [\n '.post-content noscript',\n ],\n\n // Selectors to remove from the extracted content\n clean: [\n ],\n\n // Convert the noscript tag to a div\n transforms: {\n noscript: 'div',\n },\n },\n\n author: {\n selectors: [\n '.post-author-name',\n ],\n },\n\n title: {\n selectors: [\n '.post h2.title',\n ],\n },\n\n date_published: {\n selectors: [\n 'span.publishdate',\n ],\n },\n};\n","export const NYMagExtractor = {\n domain: 'nymag.com',\n content: {\n // Order by most likely. Extractor will stop on first occurrence\n selectors: [\n 'div.article-content',\n 'section.body',\n 'article.article',\n ],\n\n // Selectors to remove from the extracted content\n clean: [\n '.ad',\n '.single-related-story',\n ],\n\n // Object of tranformations to make on matched elements\n // Each key is the selector, each value is the tag to\n // transform to.\n // If a function is given, it should return a string\n // to convert to or nothing (in which case it will not perform\n // the transformation.\n transforms: {\n // Convert h1s to h2s\n h1: 'h2',\n\n // Convert lazy-loaded noscript images to figures\n noscript: ($node, $) => {\n const $children = $.browser ? $($node.text()) : $node.children();\n if ($children.length === 1 && $children.get(0) !== undefined &&\n $children.get(0).tagName.toLowerCase() === 'img') {\n return 'figure';\n }\n\n return null;\n },\n },\n },\n\n title: {\n selectors: [\n 'h1.lede-feature-title',\n 'h1.headline-primary',\n 'h1',\n ],\n },\n\n author: {\n selectors: [\n '.by-authors',\n '.lede-feature-author',\n ],\n },\n\n dek: {\n selectors: [\n '.lede-feature-teaser',\n ],\n },\n\n date_published: {\n selectors: [\n ['time.article-timestamp[datetime]', 'datetime'],\n 'time.article-timestamp',\n ],\n },\n};\n","export const WikipediaExtractor = {\n domain: 'wikipedia.org',\n content: {\n selectors: [\n '#mw-content-text',\n ],\n\n defaultCleaner: false,\n\n // transform top infobox to an image with caption\n transforms: {\n '.infobox img': ($node) => {\n const $parent = $node.parents('.infobox');\n // Only prepend the first image in .infobox\n if ($parent.children('img').length === 0) {\n $parent.prepend($node);\n }\n },\n '.infobox caption': 'figcaption',\n '.infobox': 'figure',\n },\n\n // Selectors to remove from the extracted content\n clean: [\n '.mw-editsection',\n 'figure tr, figure td, figure tbody',\n '#toc',\n '.navbox',\n ],\n\n },\n\n author: 'Wikipedia Contributors',\n\n title: {\n selectors: [\n 'h2.title',\n ],\n },\n\n date_published: {\n selectors: [\n '#footer-info-lastmod',\n ],\n },\n\n};\n","export const TwitterExtractor = {\n domain: 'twitter.com',\n\n content: {\n transforms: {\n // We're transforming essentially the whole page here.\n // Twitter doesn't have nice selectors, so our initial\n // selector grabs the whole page, then we're re-writing\n // it to fit our needs before we clean it up.\n '.permalink[role=main]': ($node, $) => {\n const tweets = $node.find('.tweet');\n const $tweetContainer = $('');\n $tweetContainer.append(tweets);\n $node.replaceWith($tweetContainer);\n },\n\n // Twitter wraps @ with s, which\n // renders as a strikethrough\n s: 'span',\n },\n\n selectors: [\n '.permalink[role=main]',\n ],\n\n defaultCleaner: false,\n\n clean: [\n '.stream-item-footer',\n 'button',\n '.tweet-details-fixer',\n ],\n },\n\n author: {\n selectors: [\n '.tweet.permalink-tweet .username',\n ],\n },\n\n date_published: {\n selectors: [\n ['.permalink-tweet ._timestamp[data-time-ms]', 'data-time-ms'],\n // '.tweet.permalink-tweet .metadata',\n ],\n },\n\n};\n","export const NYTimesExtractor = {\n domain: 'www.nytimes.com',\n\n title: {\n selectors: [\n '.g-headline',\n 'h1.headline',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n '.g-byline',\n '.byline',\n ],\n },\n\n content: {\n selectors: [\n 'div.g-blocks',\n 'article#story',\n ],\n\n transforms: {\n 'img.g-lazy': ($node) => {\n let src = $node.attr('src');\n // const widths = $node.attr('data-widths')\n // .slice(1)\n // .slice(0, -1)\n // .split(',');\n // if (widths.length) {\n // width = widths.slice(-1);\n // } else {\n // width = '900';\n // }\n const width = 640;\n\n src = src.replace('{{size}}', width);\n $node.attr('src', src);\n },\n },\n\n clean: [\n '.ad',\n 'header#story-header',\n '.story-body-1 .lede.video',\n '.visually-hidden',\n '#newsletter-promo',\n '.promo',\n '.comments-button',\n '.hidden',\n '.comments',\n ],\n },\n\n date_published: null,\n\n lead_image_url: null,\n\n dek: null,\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\nexport const TheAtlanticExtractor = {\n domain: 'www.theatlantic.com',\n title: {\n selectors: [\n 'h1.hed',\n ],\n },\n\n author: {\n selectors: [\n 'article#article .article-cover-extra .metadata .byline a',\n ],\n },\n\n content: {\n selectors: [\n '.article-body',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['time[itemProp=\"datePublished\"]', 'datetime'],\n ],\n },\n\n lead_image_url: null,\n\n dek: null,\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const NewYorkerExtractor = {\n domain: 'www.newyorker.com',\n title: {\n selectors: [\n 'h1.title',\n ],\n },\n\n author: {\n selectors: [\n '.contributors',\n ],\n },\n\n content: {\n selectors: [\n 'div#articleBody',\n 'div.articleBody',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"og:description\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const WiredExtractor = {\n domain: 'www.wired.com',\n title: {\n selectors: [\n 'h1.post-title',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'a[rel=\"author\"]',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n 'article.content',\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.visually-hidden',\n\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[itemprop=\"datePublished\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"og:description\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const MSNExtractor = {\n domain: 'www.msn.com',\n title: {\n selectors: [\n 'h1',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'span.authorname-txt',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n 'div.richtext',\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n 'span.caption',\n\n ],\n },\n\n date_published: {\n selectors: [\n 'span.time',\n ],\n },\n\n lead_image_url: {\n selectors: [\n\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"description\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const YahooExtractor = {\n domain: 'www.yahoo.com',\n title: {\n selectors: [\n 'header.canvas-header',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'span.provider-name',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.content-canvas',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.figure-caption',\n\n ],\n },\n\n date_published: {\n selectors: [\n ['time.date[datetime]', 'datetime'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"og:description\"]', 'value'],\n // enter dek selectors\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const BuzzfeedExtractor = {\n domain: 'www.buzzfeed.com',\n title: {\n selectors: [\n 'h1[id=\"post-title\"]',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n 'a[data-action=\"user/username\"]', 'byline__author',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n '#buzz_sub_buzz',\n // enter content selectors\n ],\n\n defaultCleaner: false,\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n h2: 'b',\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n '.instapaper_ignore',\n '.suplist_list_hide .buzz_superlist_item .buzz_superlist_number_inline',\n '.share-box',\n ],\n },\n\n date_published: {\n selectors: [\n '.buzz-datetime',\n // enter author selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"description\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const WikiaExtractor = {\n domain: 'fandom.wikia.com',\n title: {\n selectors: [\n 'h1.entry-title',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n '.author vcard', '.fn',\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n '.grid-content',\n '.entry-content',\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[name=\"article:published_time\"]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"og:description\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const LittleThingsExtractor = {\n domain: 'www.littlethings.com',\n title: {\n selectors: [\n 'h1.post-title',\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n // enter author selectors\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.mainContentIntro',\n '.content-wrapper',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const PoliticoExtractor = {\n domain: 'www.politico.com',\n title: {\n selectors: [\n // enter title selectors\n ['meta[name=\"og:title\"]', 'value'],\n ],\n },\n\n author: {\n selectors: [\n '.story-main-content .byline .vcard',\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n '.story-main-content',\n '.content-group', '.story-core',\n '.story-text',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: [\n ],\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n 'figcaption',\n ],\n },\n\n date_published: {\n selectors: [\n ['.story-main-content .timestamp time[datetime]', 'datetime'],\n\n ],\n },\n\n lead_image_url: {\n selectors: [\n // enter lead_image_url selectors\n ['meta[name=\"og:image\"]', 'value'],\n\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"description\"]', 'value'],\n ],\n },\n\n next_page_url: null,\n\n excerpt: null,\n};\n","export const DeadspinExtractor = {\n domain: 'deadspin.com',\n\n supportedDomains: [\n 'jezebel.com',\n 'lifehacker.com',\n 'kotaku.com',\n 'gizmodo.com',\n 'jalopnik.com',\n 'kinja.com',\n ],\n\n title: {\n selectors: [\n 'h1.headline',\n ],\n },\n\n author: {\n selectors: [\n '.author',\n ],\n },\n\n content: {\n selectors: [\n '.post-content',\n '.entry-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'iframe.lazyload[data-recommend-id^=\"youtube://\"]': ($node) => {\n const youtubeId = $node.attr('id').split('youtube-')[1];\n $node.attr('src', `https://www.youtube.com/embed/${youtubeId}`);\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n ],\n },\n\n date_published: {\n selectors: [\n ['time.updated[datetime]', 'datetime'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const BroadwayWorldExtractor = {\n domain: 'www.broadwayworld.com',\n title: {\n selectors: [\n 'h1.article-title',\n ],\n },\n\n author: {\n selectors: [\n 'span[itemprop=author]',\n ],\n },\n\n content: {\n selectors: [\n 'div[itemprop=articlebody]',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['meta[itemprop=datePublished]', 'value'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=\"og:description\"]', 'value'],\n ],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","// Rename CustomExtractor\n// to fit your publication\n// (e.g., NYTimesExtractor)\nexport const ApartmentTherapyExtractor = {\n domain: 'www.apartmenttherapy.com',\n title: {\n selectors: [\n 'h1.headline',\n ],\n },\n\n author: {\n selectors: [\n '.PostByline__name',\n ],\n },\n\n content: {\n selectors: [\n 'div.post__content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n 'div[data-render-react-id=\"images/LazyPicture\"]': ($node, $) => {\n const data = JSON.parse($node.attr('data-props'));\n const { src } = data.sources[0];\n const $img = $('').attr('src', src);\n $node.replaceWith($img);\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['.PostByline__timestamp[datetime]', 'datetime'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n ['meta[name=description]', 'value'],\n ],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","export const MediumExtractor = {\n domain: 'medium.com',\n\n supportedDomains: [\n 'trackchanges.postlight.com',\n ],\n\n title: {\n selectors: [\n 'h1',\n ],\n },\n\n author: {\n selectors: [\n ['meta[name=\"author\"]', 'value'],\n ],\n },\n\n content: {\n selectors: [\n '.section-content',\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n // Re-write lazy-loaded youtube videos\n iframe: ($node) => {\n const ytRe =\n /https:\\/\\/i.embed.ly\\/.+url=https:\\/\\/i\\.ytimg\\.com\\/vi\\/(\\w+)\\//;\n const thumb = decodeURIComponent($node.attr('data-thumbnail'));\n\n if (ytRe.test(thumb)) {\n const [_, youtubeId] = thumb.match(ytRe) // eslint-disable-line\n $node.attr('src', `https://www.youtube.com/embed/${youtubeId}`);\n const $parent = $node.parents('figure');\n $parent.prepend($node.clone());\n $node.remove();\n }\n },\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ],\n },\n\n date_published: {\n selectors: [\n ['time[datetime]', 'datetime'],\n ],\n },\n\n lead_image_url: {\n selectors: [\n ['meta[name=\"og:image\"]', 'value'],\n ],\n },\n\n dek: {\n selectors: [\n // enter selectors\n ],\n },\n\n next_page_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n excerpt: {\n selectors: [\n // enter selectors\n ],\n },\n};\n","import mergeSupportedDomains from 'utils/merge-supported-domains';\nimport * as CustomExtractors from './custom/index';\n\nexport default Object.keys(CustomExtractors).reduce((acc, key) => {\n const extractor = CustomExtractors[key];\n return {\n ...acc,\n ...mergeSupportedDomains(extractor),\n };\n}, {});\n","// CLEAN AUTHOR CONSTANTS\nexport const CLEAN_AUTHOR_RE = /^\\s*(posted |written )?by\\s*:?\\s*(.*)/i;\n // author = re.sub(r'^\\s*(posted |written )?by\\s*:?\\s*(.*)(?i)',\n\n// CLEAN DEK CONSTANTS\nexport const TEXT_LINK_RE = new RegExp('http(s)?://', 'i');\n// An ordered list of meta tag names that denote likely article deks.\n// From most distinct to least distinct.\n//\n// NOTE: There are currently no meta tags that seem to provide the right\n// content consistenty enough. Two options were:\n// - og:description\n// - dc.description\n// However, these tags often have SEO-specific junk in them that's not\n// header-worthy like a dek is. Excerpt material at best.\nexport const DEK_META_TAGS = [\n];\n\n// An ordered list of Selectors to find likely article deks. From\n// most explicit to least explicit.\n//\n// Should be more restrictive than not, as a failed dek can be pretty\n// detrimental to the aesthetics of an article.\nexport const DEK_SELECTORS = [\n '.entry-summary',\n];\n\n// CLEAN DATE PUBLISHED CONSTANTS\nexport const MS_DATE_STRING = /^\\d{13}$/i;\nexport const SEC_DATE_STRING = /^\\d{10}$/i;\nexport const CLEAN_DATE_STRING_RE = /^\\s*published\\s*:?\\s*(.*)/i;\nexport const TIME_MERIDIAN_SPACE_RE = /(.*\\d)(am|pm)(.*)/i;\nexport const TIME_MERIDIAN_DOTS_RE = /\\.m\\./i;\nconst months = [\n 'jan',\n 'feb',\n 'mar',\n 'apr',\n 'may',\n 'jun',\n 'jul',\n 'aug',\n 'sep',\n 'oct',\n 'nov',\n 'dec',\n];\nconst allMonths = months.join('|');\nconst timestamp1 = '[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';\nconst timestamp2 = '[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';\nexport const SPLIT_DATE_STRING =\n new RegExp(`(${timestamp1})|(${timestamp2})|([0-9]{1,4})|(${allMonths})`, 'ig');\n\n// CLEAN TITLE CONSTANTS\n// A regular expression that will match separating characters on a\n// title, that usually denote breadcrumbs or something similar.\nexport const TITLE_SPLITTERS_RE = /(: | - | \\| )/g;\n\nexport const DOMAIN_ENDINGS_RE =\n new RegExp('.com$|.net$|.org$|.co.uk$', 'g');\n","import { CLEAN_AUTHOR_RE } from './constants';\n\n// Take an author string (like 'By David Smith ') and clean it to\n// just the name(s): 'David Smith'.\nexport default function cleanAuthor(author) {\n return author.replace(CLEAN_AUTHOR_RE, '$2').trim();\n}\n","import validUrl from 'valid-url';\n\nexport default function clean(leadImageUrl) {\n leadImageUrl = leadImageUrl.trim();\n if (validUrl.isWebUri(leadImageUrl)) {\n return leadImageUrl;\n }\n\n return null;\n}\n","import { stripTags } from 'utils/dom';\nimport { excerptContent } from 'utils/text';\n\nimport { TEXT_LINK_RE } from './constants';\n\n// Take a dek HTML fragment, and return the cleaned version of it.\n// Return None if the dek wasn't good enough.\nexport default function cleanDek(dek, { $, excerpt }) {\n // Sanity check that we didn't get too short or long of a dek.\n if (dek.length > 1000 || dek.length < 5) return null;\n\n // Check that dek isn't the same as excerpt\n if (excerpt && excerptContent(excerpt, 10) === excerptContent(dek, 10)) return null;\n\n const dekText = stripTags(dek, $);\n\n // Plain text links shouldn't exist in the dek. If we have some, it's\n // not a good dek - bail.\n if (TEXT_LINK_RE.test(dekText)) return null;\n\n return dekText.trim();\n}\n","import moment from 'moment';\n// Is there a compelling reason to use moment here?\n// Mostly only being used for the isValid() method,\n// but could just check for 'Invalid Date' string.\n\nimport {\n MS_DATE_STRING,\n SEC_DATE_STRING,\n CLEAN_DATE_STRING_RE,\n SPLIT_DATE_STRING,\n TIME_MERIDIAN_SPACE_RE,\n TIME_MERIDIAN_DOTS_RE,\n} from './constants';\n\nexport function cleanDateString(dateString) {\n return (dateString.match(SPLIT_DATE_STRING) || [])\n .join(' ')\n .replace(TIME_MERIDIAN_DOTS_RE, 'm')\n .replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3')\n .replace(CLEAN_DATE_STRING_RE, '$1')\n .trim();\n}\n\n// Take a date published string, and hopefully return a date out of\n// it. Return none if we fail.\nexport default function cleanDatePublished(dateString) {\n // If string is in milliseconds or seconds, convert to int\n if (MS_DATE_STRING.test(dateString) || SEC_DATE_STRING.test(dateString)) {\n dateString = parseInt(dateString, 10);\n }\n\n let date = moment(new Date(dateString));\n\n if (!date.isValid()) {\n dateString = cleanDateString(dateString);\n date = moment(new Date(dateString));\n }\n\n return date.isValid() ? date.toISOString() : null;\n}\n","import {\n cleanAttributes,\n cleanHeaders,\n cleanHOnes,\n cleanImages,\n cleanTags,\n removeEmpty,\n rewriteTopLevel,\n markToKeep,\n stripJunkTags,\n makeLinksAbsolute,\n} from 'utils/dom';\n\n// Clean our article content, returning a new, cleaned node.\nexport default function extractCleanNode(\n article,\n {\n $,\n cleanConditionally = true,\n title = '',\n url = '',\n defaultCleaner = true,\n }\n) {\n // Rewrite the tag name to div if it's a top level node like body or\n // html to avoid later complications with multiple body tags.\n rewriteTopLevel(article, $);\n\n // Drop small images and spacer images\n // Only do this is defaultCleaner is set to true;\n // this can sometimes be too aggressive.\n if (defaultCleaner) cleanImages(article, $);\n\n // Mark elements to keep that would normally be removed.\n // E.g., stripJunkTags will remove iframes, so we're going to mark\n // YouTube/Vimeo videos as elements we want to keep.\n markToKeep(article, $, url);\n\n // Drop certain tags like , etc\n // This is -mostly- for cleanliness, not security.\n stripJunkTags(article, $);\n\n // H1 tags are typically the article title, which should be extracted\n // by the title extractor instead. If there's less than 3 of them (<3),\n // strip them. Otherwise, turn 'em into H2s.\n cleanHOnes(article, $);\n\n // Clean headers\n cleanHeaders(article, $, title);\n\n // Make links absolute\n makeLinksAbsolute(article, $, url);\n\n // We used to clean UL's and OL's here, but it was leading to\n // too many in-article lists being removed. Consider a better\n // way to detect menus particularly and remove them.\n // Also optionally running, since it can be overly aggressive.\n if (defaultCleaner) cleanTags(article, $, cleanConditionally);\n\n // Remove empty paragraph nodes\n removeEmpty(article, $);\n\n // Remove unnecessary attributes\n cleanAttributes(article, $);\n\n return article;\n}\n","import { stripTags } from 'utils/dom';\n\nimport { TITLE_SPLITTERS_RE } from './constants';\nimport { resolveSplitTitle } from './index';\n\nexport default function cleanTitle(title, { url, $ }) {\n // If title has |, :, or - in it, see if\n // we can clean it up.\n if (TITLE_SPLITTERS_RE.test(title)) {\n title = resolveSplitTitle(title, url);\n }\n\n // Final sanity check that we didn't get a crazy title.\n // if (title.length > 150 || title.length < 15) {\n if (title.length > 150) {\n // If we did, return h1 from the document if it exists\n const h1 = $('h1');\n if (h1.length === 1) {\n title = h1.text();\n }\n }\n\n // strip any html tags in the title text\n return stripTags(title, $).trim();\n}\n","import URL from 'url';\nimport wuzzy from 'wuzzy';\n\nimport {\n TITLE_SPLITTERS_RE,\n DOMAIN_ENDINGS_RE,\n} from './constants';\n\nfunction extractBreadcrumbTitle(splitTitle, text) {\n // This must be a very breadcrumbed title, like:\n // The Best Gadgets on Earth : Bits : Blogs : NYTimes.com\n // NYTimes - Blogs - Bits - The Best Gadgets on Earth\n if (splitTitle.length >= 6) {\n // Look to see if we can find a breadcrumb splitter that happens\n // more than once. If we can, we'll be able to better pull out\n // the title.\n const termCounts = splitTitle.reduce((acc, titleText) => {\n acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;\n return acc;\n }, {});\n\n const [maxTerm, termCount] =\n Reflect.ownKeys(termCounts)\n .reduce((acc, key) => {\n if (acc[1] < termCounts[key]) {\n return [key, termCounts[key]];\n }\n\n return acc;\n }, [0, 0]);\n\n // We found a splitter that was used more than once, so it\n // is probably the breadcrumber. Split our title on that instead.\n // Note: max_term should be <= 4 characters, so that \" >> \"\n // will match, but nothing longer than that.\n if (termCount >= 2 && maxTerm.length <= 4) {\n splitTitle = text.split(maxTerm);\n }\n\n const splitEnds = [splitTitle[0], splitTitle.slice(-1)];\n const longestEnd = splitEnds.reduce((acc, end) => acc.length > end.length ? acc : end, '');\n\n if (longestEnd.length > 10) {\n return longestEnd;\n }\n\n return text;\n }\n\n return null;\n}\n\nfunction cleanDomainFromTitle(splitTitle, url) {\n // Search the ends of the title, looking for bits that fuzzy match\n // the URL too closely. If one is found, discard it and return the\n // rest.\n //\n // Strip out the big TLDs - it just makes the matching a bit more\n // accurate. Not the end of the world if it doesn't strip right.\n const { host } = URL.parse(url);\n const nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');\n\n const startSlug = splitTitle[0].toLowerCase().replace(' ', '');\n const startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);\n\n if (startSlugRatio > 0.4 && startSlug.length > 5) {\n return splitTitle.slice(2).join('');\n }\n\n const endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '');\n const endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);\n\n if (endSlugRatio > 0.4 && endSlug.length >= 5) {\n return splitTitle.slice(0, -2).join('');\n }\n\n return null;\n}\n\n// Given a title with separators in it (colons, dashes, etc),\n// resolve whether any of the segments should be removed.\nexport default function resolveSplitTitle(title, url = '') {\n // Splits while preserving splitters, like:\n // ['The New New York', ' - ', 'The Washington Post']\n const splitTitle = title.split(TITLE_SPLITTERS_RE);\n if (splitTitle.length === 1) {\n return title;\n }\n\n let newTitle = extractBreadcrumbTitle(splitTitle, title);\n if (newTitle) return newTitle;\n\n newTitle = cleanDomainFromTitle(splitTitle, url);\n if (newTitle) return newTitle;\n\n // Fuzzy ratio didn't find anything, so this title is probably legit.\n // Just return it all.\n return title;\n}\n","import cleanAuthor from './author';\nimport cleanImage from './lead-image-url';\nimport cleanDek from './dek';\nimport cleanDatePublished from './date-published';\nimport cleanContent from './content';\nimport cleanTitle from './title';\n\nconst Cleaners = {\n author: cleanAuthor,\n lead_image_url: cleanImage,\n dek: cleanDek,\n date_published: cleanDatePublished,\n content: cleanContent,\n title: cleanTitle,\n};\n\nexport default Cleaners;\n\nexport { cleanAuthor };\nexport { cleanImage };\nexport { cleanDek };\nexport { cleanDatePublished };\nexport { cleanContent };\nexport { cleanTitle };\nexport { default as resolveSplitTitle } from './resolve-split-title';\n","import {\n stripUnlikelyCandidates,\n convertToParagraphs,\n} from 'utils/dom';\n\nimport {\n scoreContent,\n findTopCandidate,\n} from './scoring';\n\n// Using a variety of scoring techniques, extract the content most\n// likely to be article text.\n//\n// If strip_unlikely_candidates is True, remove any elements that\n// match certain criteria first. (Like, does this element have a\n// classname of \"comment\")\n//\n// If weight_nodes is True, use classNames and IDs to determine the\n// worthiness of nodes.\n//\n// Returns a cheerio object $\nexport default function extractBestNode($, opts) {\n // clone the node so we can get back to our\n // initial parsed state if needed\n // TODO Do I need this? – AP\n // let $root = $.root().clone()\n\n if (opts.stripUnlikelyCandidates) {\n $ = stripUnlikelyCandidates($);\n }\n\n $ = convertToParagraphs($);\n $ = scoreContent($, opts.weightNodes);\n const $topCandidate = findTopCandidate($);\n\n return $topCandidate;\n}\n","import cheerio from 'cheerio';\n\nimport { nodeIsSufficient } from 'utils/dom';\nimport { cleanContent } from 'cleaners';\nimport { normalizeSpaces } from 'utils/text';\n\nimport extractBestNode from './extract-best-node';\n\nconst GenericContentExtractor = {\n defaultOpts: {\n stripUnlikelyCandidates: true,\n weightNodes: true,\n cleanConditionally: true,\n },\n\n // Extract the content for this resource - initially, pass in our\n // most restrictive opts which will return the highest quality\n // content. On each failure, retry with slightly more lax opts.\n //\n // :param return_type: string. If \"node\", should return the content\n // as a cheerio node rather than as an HTML string.\n //\n // Opts:\n // stripUnlikelyCandidates: Remove any elements that match\n // non-article-like criteria first.(Like, does this element\n // have a classname of \"comment\")\n //\n // weightNodes: Modify an elements score based on whether it has\n // certain classNames or IDs. Examples: Subtract if a node has\n // a className of 'comment', Add if a node has an ID of\n // 'entry-content'.\n //\n // cleanConditionally: Clean the node to return of some\n // superfluous content. Things like forms, ads, etc.\n extract({ $, html, title, url }, opts) {\n opts = { ...this.defaultOpts, ...opts };\n\n $ = $ || cheerio.load(html);\n\n // Cascade through our extraction-specific opts in an ordered fashion,\n // turning them off as we try to extract content.\n let node = this.getContentNode($, title, url, opts);\n\n if (nodeIsSufficient(node)) {\n return this.cleanAndReturnNode(node, $);\n }\n\n // We didn't succeed on first pass, one by one disable our\n // extraction opts and try again.\n for (const key of Reflect.ownKeys(opts).filter(k => opts[k] === true)) {\n opts[key] = false;\n $ = cheerio.load(html);\n\n node = this.getContentNode($, title, url, opts);\n\n if (nodeIsSufficient(node)) {\n break;\n }\n }\n\n return this.cleanAndReturnNode(node, $);\n },\n\n // Get node given current options\n getContentNode($, title, url, opts) {\n return cleanContent(\n extractBestNode($, opts),\n {\n $,\n cleanConditionally: opts.cleanConditionally,\n title,\n url,\n });\n },\n\n // Once we got here, either we're at our last-resort node, or\n // we broke early. Make sure we at least have -something- before we\n // move forward.\n cleanAndReturnNode(node, $) {\n if (!node) {\n return null;\n }\n\n return normalizeSpaces($.html(node));\n\n // if return_type == \"html\":\n // return normalize_spaces(node_to_html(node))\n // else:\n // return node\n },\n\n};\n\nexport default GenericContentExtractor;\n","// TODO: It would be great if we could merge the meta and selector lists into\n// a list of objects, because we could then rank them better. For example,\n// .hentry .entry-title is far better suited than .\n\n// An ordered list of meta tag names that denote likely article titles. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\nexport const STRONG_TITLE_META_TAGS = [\n 'tweetmeme-title',\n 'dc.title',\n 'rbtitle',\n 'headline',\n 'title',\n];\n\n// og:title is weak because it typically contains context that we don't like,\n// for example the source site's name. Gotta get that brand into facebook!\nexport const WEAK_TITLE_META_TAGS = [\n 'og:title',\n];\n\n// An ordered list of XPath Selectors to find likely article titles. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nexport const STRONG_TITLE_SELECTORS = [\n '.hentry .entry-title',\n 'h1#articleHeader',\n 'h1.articleHeader',\n 'h1.article',\n '.instapaper_title',\n '#meebo-title',\n];\n\nexport const WEAK_TITLE_SELECTORS = [\n 'article h1',\n '#entry-title',\n '.entry-title',\n '#entryTitle',\n '#entrytitle',\n '.entryTitle',\n '.entrytitle',\n '#articleTitle',\n '.articleTitle',\n 'post post-title',\n 'h1.title',\n 'h2.article',\n 'h1',\n 'html head title',\n 'title',\n];\n","import { cleanTitle } from 'cleaners';\nimport {\n extractFromMeta,\n extractFromSelectors,\n} from 'utils/dom';\n\nimport {\n STRONG_TITLE_META_TAGS,\n WEAK_TITLE_META_TAGS,\n STRONG_TITLE_SELECTORS,\n WEAK_TITLE_SELECTORS,\n} from './constants';\n\nconst GenericTitleExtractor = {\n extract({ $, url, metaCache }) {\n // First, check to see if we have a matching meta tag that we can make\n // use of that is strongly associated with the headline.\n let title;\n\n title = extractFromMeta($, STRONG_TITLE_META_TAGS, metaCache);\n if (title) return cleanTitle(title, { url, $ });\n\n // Second, look through our content selectors for the most likely\n // article title that is strongly associated with the headline.\n title = extractFromSelectors($, STRONG_TITLE_SELECTORS);\n if (title) return cleanTitle(title, { url, $ });\n\n // Third, check for weaker meta tags that may match.\n title = extractFromMeta($, WEAK_TITLE_META_TAGS, metaCache);\n if (title) return cleanTitle(title, { url, $ });\n\n // Last, look for weaker selector tags that may match.\n title = extractFromSelectors($, WEAK_TITLE_SELECTORS);\n if (title) return cleanTitle(title, { url, $ });\n\n // If no matches, return an empty string\n return '';\n },\n};\n\nexport default GenericTitleExtractor;\n","// An ordered list of meta tag names that denote likely article authors. All\n// attributes should be lowercase for faster case-insensitive matching. From\n// most distinct to least distinct.\n//\n// Note: \"author\" is too often the -developer- of the page, so it is not\n// added here.\nexport const AUTHOR_META_TAGS = [\n 'byl',\n 'clmst',\n 'dc.author',\n 'dcsext.author',\n 'dc.creator',\n 'rbauthors',\n 'authors',\n];\n\nexport const AUTHOR_MAX_LENGTH = 300;\n\n// An ordered list of XPath Selectors to find likely article authors. From\n// most explicit to least explicit.\n//\n// Note - this does not use classes like CSS. This checks to see if the string\n// exists in the className, which is not as accurate as .className (which\n// splits on spaces/endlines), but for our purposes it's close enough. The\n// speed tradeoff is worth the accuracy hit.\nexport const AUTHOR_SELECTORS = [\n '.entry .entry-author',\n '.author.vcard .fn',\n '.author .vcard .fn',\n '.byline.vcard .fn',\n '.byline .vcard .fn',\n '.byline .by .author',\n '.byline .by',\n '.byline .author',\n '.post-author.vcard',\n '.post-author .vcard',\n 'a[rel=author]',\n '#by_author',\n '.by_author',\n '#entryAuthor',\n '.entryAuthor',\n '.byline a[href*=author]',\n '#author .authorname',\n '.author .authorname',\n '#author',\n '.author',\n '.articleauthor',\n '.ArticleAuthor',\n '.byline',\n];\n\n// An ordered list of Selectors to find likely article authors, with\n// regular expression for content.\nconst bylineRe = /^[\\n\\s]*By/i;\nexport const BYLINE_SELECTORS_RE = [\n ['#byline', bylineRe],\n ['.byline', bylineRe],\n];\n","import { cleanAuthor } from 'cleaners';\nimport {\n extractFromMeta,\n extractFromSelectors,\n} from 'utils/dom';\n\nimport {\n AUTHOR_META_TAGS,\n AUTHOR_MAX_LENGTH,\n AUTHOR_SELECTORS,\n BYLINE_SELECTORS_RE,\n} from './constants';\n\nconst GenericAuthorExtractor = {\n extract({ $, metaCache }) {\n let author;\n\n // First, check to see if we have a matching\n // meta tag that we can make use of.\n author = extractFromMeta($, AUTHOR_META_TAGS, metaCache);\n if (author && author.length < AUTHOR_MAX_LENGTH) {\n return cleanAuthor(author);\n }\n\n // Second, look through our selectors looking for potential authors.\n author = extractFromSelectors($, AUTHOR_SELECTORS, 2);\n if (author && author.length < AUTHOR_MAX_LENGTH) {\n return cleanAuthor(author);\n }\n\n // Last, use our looser regular-expression based selectors for\n // potential authors.\n for (const [selector, regex] of BYLINE_SELECTORS_RE) {\n const node = $(selector);\n if (node.length === 1) {\n const text = node.text();\n if (regex.test(text)) {\n return cleanAuthor(text);\n }\n }\n }\n\n return null;\n },\n};\n\nexport default GenericAuthorExtractor;\n","// An ordered list of meta tag names that denote\n// likely date published dates. All attributes\n// should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nexport const DATE_PUBLISHED_META_TAGS = [\n 'article:published_time',\n 'displaydate',\n 'dc.date',\n 'dc.date.issued',\n 'rbpubdate',\n 'publish_date',\n 'pub_date',\n 'pagedate',\n 'pubdate',\n 'revision_date',\n 'doc_date',\n 'date_created',\n 'content_create_date',\n 'lastmodified',\n 'created',\n 'date',\n];\n\n// An ordered list of XPath Selectors to find\n// likely date published dates. From most explicit\n// to least explicit.\nexport const DATE_PUBLISHED_SELECTORS = [\n '.hentry .dtstamp.published',\n '.hentry .published',\n '.hentry .dtstamp.updated',\n '.hentry .updated',\n '.single .published',\n '.meta .published',\n '.meta .postDate',\n '.entry-date',\n '.byline .date',\n '.postmetadata .date',\n '.article_datetime',\n '.date-header',\n '.story-date',\n '.dateStamp',\n '#story .datetime',\n '.dateline',\n '.pubdate',\n];\n\n// An ordered list of compiled regular expressions to find likely date\n// published dates from the URL. These should always have the first\n// reference be a date string that is parseable by dateutil.parser.parse\nconst abbrevMonthsStr = '(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)';\nexport const DATE_PUBLISHED_URL_RES = [\n // /2012/01/27/ but not /2012/01/293\n new RegExp('/(20\\\\d{2}/\\\\d{2}/\\\\d{2})/', 'i'),\n // 20120127 or 20120127T but not 2012012733 or 8201201733\n // /[^0-9](20\\d{2}[01]\\d[0-3]\\d)([^0-9]|$)/i,\n // 2012-01-27\n new RegExp('(20\\\\d{2}-[01]\\\\d-[0-3]\\\\d)', 'i'),\n // /2012/jan/27/\n new RegExp(`/(20\\\\d{2}/${abbrevMonthsStr}/[0-3]\\\\d)/`, 'i'),\n];\n","import { cleanDatePublished } from 'cleaners';\nimport {\n extractFromMeta,\n extractFromSelectors,\n} from 'utils/dom';\nimport { extractFromUrl } from 'utils/text';\n\nimport {\n DATE_PUBLISHED_META_TAGS,\n DATE_PUBLISHED_SELECTORS,\n DATE_PUBLISHED_URL_RES,\n} from './constants';\n\nconst GenericDatePublishedExtractor = {\n extract({ $, url, metaCache }) {\n let datePublished;\n // First, check to see if we have a matching meta tag\n // that we can make use of.\n // Don't try cleaning tags from this string\n datePublished = extractFromMeta($, DATE_PUBLISHED_META_TAGS, metaCache, false);\n if (datePublished) return cleanDatePublished(datePublished);\n\n // Second, look through our selectors looking for potential\n // date_published's.\n datePublished = extractFromSelectors($, DATE_PUBLISHED_SELECTORS);\n if (datePublished) return cleanDatePublished(datePublished);\n\n // Lastly, look to see if a dately string exists in the URL\n datePublished = extractFromUrl(url, DATE_PUBLISHED_URL_RES);\n if (datePublished) return cleanDatePublished(datePublished);\n\n return null;\n },\n};\n\nexport default GenericDatePublishedExtractor;\n","// import {\n// DEK_META_TAGS,\n// DEK_SELECTORS,\n// DEK_URL_RES,\n// } from './constants';\n\n// import { cleanDek } from 'cleaners';\n\n// import {\n// extractFromMeta,\n// extractFromSelectors,\n// } from 'utils/dom';\n\n// Currently there is only one selector for\n// deks. We should simply return null here\n// until we have a more robust generic option.\n// Below is the original source for this, for reference.\nconst GenericDekExtractor = {\n // extract({ $, content, metaCache }) {\n extract() {\n return null;\n },\n};\n\nexport default GenericDekExtractor;\n\n// def extract_dek(self):\n// # First, check to see if we have a matching meta tag that we can make\n// # use of.\n// dek = self.extract_from_meta('dek', constants.DEK_META_TAGS)\n// if not dek:\n// # Second, look through our CSS/XPath selectors. This may return\n// # an HTML fragment.\n// dek = self.extract_from_selectors('dek',\n// constants.DEK_SELECTORS,\n// text_only=False)\n//\n// if dek:\n// # Make sure our dek isn't in the first few thousand characters\n// # of the content, otherwise it's just the start of the article\n// # and not a true dek.\n// content = self.extract_content()\n// content_chunk = normalize_spaces(strip_tags(content[:2000]))\n// dek_chunk = normalize_spaces(dek[:100]) # Already has no tags.\n//\n// # 80% or greater similarity means the dek was very similar to some\n// # of the starting content, so we skip it.\n// if fuzz.partial_ratio(content_chunk, dek_chunk) < 80:\n// return dek\n//\n// return None\n","// An ordered list of meta tag names that denote likely article leading images.\n// All attributes should be lowercase for faster case-insensitive matching.\n// From most distinct to least distinct.\nexport const LEAD_IMAGE_URL_META_TAGS = [\n 'og:image',\n 'twitter:image',\n 'image_src',\n];\n\nexport const LEAD_IMAGE_URL_SELECTORS = [\n 'link[rel=image_src]',\n];\n\nexport const POSITIVE_LEAD_IMAGE_URL_HINTS = [\n 'upload',\n 'wp-content',\n 'large',\n 'photo',\n 'wp-image',\n];\nexport const POSITIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(POSITIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');\n\nexport const NEGATIVE_LEAD_IMAGE_URL_HINTS = [\n 'spacer',\n 'sprite',\n 'blank',\n 'throbber',\n 'gradient',\n 'tile',\n 'bg',\n 'background',\n 'icon',\n 'social',\n 'header',\n 'hdr',\n 'advert',\n 'spinner',\n 'loader',\n 'loading',\n 'default',\n 'rating',\n 'share',\n 'facebook',\n 'twitter',\n 'theme',\n 'promo',\n 'ads',\n 'wp-includes',\n];\nexport const NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i');\n\nexport const GIF_RE = /\\.gif(\\?.*)?$/i;\nexport const JPG_RE = /\\.jpe?g(\\?.*)?$/i;\n","import {\n POSITIVE_LEAD_IMAGE_URL_HINTS_RE,\n NEGATIVE_LEAD_IMAGE_URL_HINTS_RE,\n GIF_RE,\n JPG_RE,\n} from './constants';\n\nimport { PHOTO_HINTS_RE } from '../content/scoring/constants';\n\nfunction getSig($node) {\n return `${$node.attr('class') || ''} ${$node.attr('id') || ''}`;\n}\n\n// Scores image urls based on a variety of heuristics.\nexport function scoreImageUrl(url) {\n url = url.trim();\n let score = 0;\n\n if (POSITIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n score += 20;\n }\n\n if (NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.test(url)) {\n score -= 20;\n }\n\n // TODO: We might want to consider removing this as\n // gifs are much more common/popular than they once were\n if (GIF_RE.test(url)) {\n score -= 10;\n }\n\n if (JPG_RE.test(url)) {\n score += 10;\n }\n\n // PNGs are neutral.\n\n return score;\n}\n\n// Alt attribute usually means non-presentational image.\nexport function scoreAttr($img) {\n if ($img.attr('alt')) {\n return 5;\n }\n\n return 0;\n}\n\n// Look through our parent and grandparent for figure-like\n// container elements, give a bonus if we find them\nexport function scoreByParents($img) {\n let score = 0;\n const $figParent = $img.parents('figure').first();\n\n if ($figParent.length === 1) {\n score += 25;\n }\n\n const $parent = $img.parent();\n let $gParent;\n if ($parent.length === 1) {\n $gParent = $parent.parent();\n }\n\n [$parent, $gParent].forEach(($node) => {\n if (PHOTO_HINTS_RE.test(getSig($node))) {\n score += 15;\n }\n });\n\n return score;\n}\n\n// Look at our immediate sibling and see if it looks like it's a\n// caption. Bonus if so.\nexport function scoreBySibling($img) {\n let score = 0;\n const $sibling = $img.next();\n const sibling = $sibling.get(0);\n\n if (sibling && sibling.tagName.toLowerCase() === 'figcaption') {\n score += 25;\n }\n\n if (PHOTO_HINTS_RE.test(getSig($sibling))) {\n score += 15;\n }\n\n return score;\n}\n\nexport function scoreByDimensions($img) {\n let score = 0;\n\n const width = parseFloat($img.attr('width'));\n const height = parseFloat($img.attr('height'));\n const src = $img.attr('src');\n\n // Penalty for skinny images\n if (width && width <= 50) {\n score -= 50;\n }\n\n // Penalty for short images\n if (height && height <= 50) {\n score -= 50;\n }\n\n if (width && height && !src.includes('sprite')) {\n const area = width * height;\n if (area < 5000) { // Smaller than 50 x 100\n score -= 100;\n } else {\n score += Math.round(area / 1000);\n }\n }\n\n return score;\n}\n\nexport function scoreByPosition($imgs, index) {\n return ($imgs.length / 2) - index;\n}\n","import { extractFromMeta } from 'utils/dom';\nimport { cleanImage } from 'cleaners';\n\nimport {\n LEAD_IMAGE_URL_META_TAGS,\n LEAD_IMAGE_URL_SELECTORS,\n} from './constants';\n\nimport {\n scoreImageUrl,\n scoreAttr,\n scoreByParents,\n scoreBySibling,\n scoreByDimensions,\n scoreByPosition,\n} from './score-image';\n\n// Given a resource, try to find the lead image URL from within\n// it. Like content and next page extraction, uses a scoring system\n// to determine what the most likely image may be. Short circuits\n// on really probable things like og:image meta tags.\n//\n// Potential signals to still take advantage of:\n// * domain\n// * weird aspect ratio\nconst GenericLeadImageUrlExtractor = {\n extract({ $, content, metaCache, html }) {\n let cleanUrl;\n if (!$.browser && $('head').length === 0) {\n $('*').first().prepend(html);\n }\n\n // Check to see if we have a matching meta tag that we can make use of.\n // Moving this higher because common practice is now to use large\n // images on things like Open Graph or Twitter cards.\n // images usually have for things like Open Graph.\n const imageUrl =\n extractFromMeta(\n $,\n LEAD_IMAGE_URL_META_TAGS,\n metaCache,\n false\n );\n\n if (imageUrl) {\n cleanUrl = cleanImage(imageUrl);\n\n if (cleanUrl) return cleanUrl;\n }\n\n // Next, try to find the \"best\" image via the content.\n // We'd rather not have to fetch each image and check dimensions,\n // so try to do some analysis and determine them instead.\n const $content = $(content);\n const imgs = $('img', $content).toArray();\n const imgScores = {};\n\n imgs.forEach((img, index) => {\n const $img = $(img);\n const src = $img.attr('src');\n\n if (!src) return;\n\n let score = scoreImageUrl(src);\n score += scoreAttr($img);\n score += scoreByParents($img);\n score += scoreBySibling($img);\n score += scoreByDimensions($img);\n score += scoreByPosition(imgs, index);\n\n imgScores[src] = score;\n });\n\n const [topUrl, topScore] =\n Reflect.ownKeys(imgScores).reduce((acc, key) =>\n imgScores[key] > acc[1] ? [key, imgScores[key]] : acc\n , [null, 0]);\n\n if (topScore > 0) {\n cleanUrl = cleanImage(topUrl);\n\n if (cleanUrl) return cleanUrl;\n }\n\n // If nothing else worked, check to see if there are any really\n // probable nodes in the doc, like .\n for (const selector of LEAD_IMAGE_URL_SELECTORS) {\n const $node = $(selector).first();\n const src = $node.attr('src');\n if (src) {\n cleanUrl = cleanImage(src);\n if (cleanUrl) return cleanUrl;\n }\n\n const href = $node.attr('href');\n if (href) {\n cleanUrl = cleanImage(href);\n if (cleanUrl) return cleanUrl;\n }\n\n const value = $node.attr('value');\n if (value) {\n cleanUrl = cleanImage(value);\n if (cleanUrl) return cleanUrl;\n }\n }\n\n return null;\n },\n};\n\nexport default GenericLeadImageUrlExtractor;\n\n// def extract(self):\n// \"\"\"\n// # First, try to find the \"best\" image via the content.\n// # We'd rather not have to fetch each image and check dimensions,\n// # so try to do some analysis and determine them instead.\n// content = self.extractor.extract_content(return_type=\"node\")\n// imgs = content.xpath('.//img')\n// img_scores = defaultdict(int)\n// logger.debug('Scoring %d images from content', len(imgs))\n// for (i, img) in enumerate(imgs):\n// img_score = 0\n//\n// if not 'src' in img.attrib:\n// logger.debug('No src attribute found')\n// continue\n//\n// try:\n// parsed_img = urlparse(img.attrib['src'])\n// img_path = parsed_img.path.lower()\n// except ValueError:\n// logger.debug('ValueError getting img path.')\n// continue\n// logger.debug('Image path is %s', img_path)\n//\n// if constants.POSITIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):\n// logger.debug('Positive URL hints match. Adding 20.')\n// img_score += 20\n//\n// if constants.NEGATIVE_LEAD_IMAGE_URL_HINTS_RE.match(img_path):\n// logger.debug('Negative URL hints match. Subtracting 20.')\n// img_score -= 20\n//\n// # Gifs are more often structure than photos\n// if img_path.endswith('gif'):\n// logger.debug('gif found. Subtracting 10.')\n// img_score -= 10\n//\n// # JPGs are more often photographs\n// if img_path.endswith('jpg'):\n// logger.debug('jpg found. Adding 10.')\n// img_score += 10\n//\n// # PNGs are neutral.\n//\n// # Alt attribute usually means non-presentational image.\n// if 'alt' in img.attrib and len(img.attrib['alt']) > 5:\n// logger.debug('alt attribute found. Adding 5.')\n// img_score += 5\n//\n// # Look through our parent and grandparent for figure-like\n// # container elements, give a bonus if we find them\n// parents = [img.getparent()]\n// if parents[0] is not None and parents[0].getparent() is not None:\n// parents.append(parents[0].getparent())\n// for p in parents:\n// if p.tag == 'figure':\n// logger.debug('Parent with
tag found. Adding 25.')\n// img_score += 25\n//\n// p_sig = ' '.join([p.get('id', ''), p.get('class', '')])\n// if constants.PHOTO_HINTS_RE.search(p_sig):\n// logger.debug('Photo hints regex match. Adding 15.')\n// img_score += 15\n//\n// # Look at our immediate sibling and see if it looks like it's a\n// # caption. Bonus if so.\n// sibling = img.getnext()\n// if sibling is not None:\n// if sibling.tag == 'figcaption':\n// img_score += 25\n//\n// sib_sig = ' '.join([sibling.get('id', ''),\n// sibling.get('class', '')]).lower()\n// if 'caption' in sib_sig:\n// img_score += 15\n//\n// # Pull out width/height if they were set.\n// img_width = None\n// img_height = None\n// if 'width' in img.attrib:\n// try:\n// img_width = float(img.get('width'))\n// except ValueError:\n// pass\n// if 'height' in img.attrib:\n// try:\n// img_height = float(img.get('height'))\n// except ValueError:\n// pass\n//\n// # Penalty for skinny images\n// if img_width and img_width <= 50:\n// logger.debug('Skinny image found. Subtracting 50.')\n// img_score -= 50\n//\n// # Penalty for short images\n// if img_height and img_height <= 50:\n// # Wide, short images are more common than narrow, tall ones\n// logger.debug('Short image found. Subtracting 25.')\n// img_score -= 25\n//\n// if img_width and img_height and not 'sprite' in img_path:\n// area = img_width * img_height\n//\n// if area < 5000: # Smaller than 50x100\n// logger.debug('Image with small area found. Subtracting 100.')\n// img_score -= 100\n// else:\n// img_score += round(area/1000.0)\n//\n// # If the image is higher on the page than other images,\n// # it gets a bonus. Penalty if lower.\n// logger.debug('Adding page placement bonus of %d.', len(imgs)/2 - i)\n// img_score += len(imgs)/2 - i\n//\n// # Use the raw src here because we munged img_path for case\n// # insensitivity\n// logger.debug('Final score is %d.', img_score)\n// img_scores[img.attrib['src']] += img_score\n//\n// top_score = 0\n// top_url = None\n// for (url, score) in img_scores.items():\n// if score > top_score:\n// top_url = url\n// top_score = score\n//\n// if top_score > 0:\n// logger.debug('Using top score image from content. Score was %d', top_score)\n// return top_url\n//\n//\n// # If nothing else worked, check to see if there are any really\n// # probable nodes in the doc, like .\n// logger.debug('Trying to find lead image in probable nodes')\n// for selector in constants.LEAD_IMAGE_URL_SELECTORS:\n// nodes = self.resource.extract_by_selector(selector)\n// for node in nodes:\n// clean_value = None\n// if node.attrib.get('src'):\n// clean_value = self.clean(node.attrib['src'])\n//\n// if not clean_value and node.attrib.get('href'):\n// clean_value = self.clean(node.attrib['href'])\n//\n// if not clean_value and node.attrib.get('value'):\n// clean_value = self.clean(node.attrib['value'])\n//\n// if clean_value:\n// logger.debug('Found lead image in probable nodes.')\n// logger.debug('Node was: %s', node)\n// return clean_value\n//\n// return None\n","import difflib from 'difflib';\n\nexport default function scoreSimilarity(score, articleUrl, href) {\n // Do this last and only if we have a real candidate, because it's\n // potentially expensive computationally. Compare the link to this\n // URL using difflib to get the % similarity of these URLs. On a\n // sliding scale, subtract points from this link based on\n // similarity.\n if (score > 0) {\n const similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio();\n // Subtract .1 from diff_percent when calculating modifier,\n // which means that if it's less than 10% different, we give a\n // bonus instead. Ex:\n // 3% different = +17.5 points\n // 10% different = 0 points\n // 20% different = -25 points\n const diffPercent = 1.0 - similarity;\n const diffModifier = -(250 * (diffPercent - 0.2));\n return score + diffModifier;\n }\n\n return 0;\n}\n","import { IS_DIGIT_RE } from 'utils/text/constants';\n\nexport default function scoreLinkText(linkText, pageNum) {\n // If the link text can be parsed as a number, give it a minor\n // bonus, with a slight bias towards lower numbered pages. This is\n // so that pages that might not have 'next' in their text can still\n // get scored, and sorted properly by score.\n let score = 0;\n\n if (IS_DIGIT_RE.test(linkText.trim())) {\n const linkTextAsNum = parseInt(linkText, 10);\n // If it's the first page, we already got it on the first call.\n // Give it a negative score. Otherwise, up to page 10, give a\n // small bonus.\n if (linkTextAsNum < 2) {\n score = -30;\n } else {\n score = Math.max(0, 10 - linkTextAsNum);\n }\n\n // If it appears that the current page number is greater than\n // this links page number, it's a very bad sign. Give it a big\n // penalty.\n if (pageNum && pageNum >= linkTextAsNum) {\n score -= 50;\n }\n }\n\n return score;\n}\n","export default function scorePageInLink(pageNum, isWp) {\n // page in the link = bonus. Intentionally ignore wordpress because\n // their ?p=123 link style gets caught by this even though it means\n // separate documents entirely.\n if (pageNum && !isWp) {\n return 50;\n }\n\n return 0;\n}\n","export const DIGIT_RE = /\\d/;\n\n// A list of words that, if found in link text or URLs, likely mean that\n// this link is not a next page link.\nexport const EXTRANEOUS_LINK_HINTS = [\n 'print',\n 'archive',\n 'comment',\n 'discuss',\n 'e-mail',\n 'email',\n 'share',\n 'reply',\n 'all',\n 'login',\n 'sign',\n 'single',\n 'adx',\n 'entry-unrelated',\n];\nexport const EXTRANEOUS_LINK_HINTS_RE = new RegExp(EXTRANEOUS_LINK_HINTS.join('|'), 'i');\n\n// Match any link text/classname/id that looks like it could mean the next\n// page. Things like: next, continue, >, >>, » but not >|, »| as those can\n// mean last page.\nexport const NEXT_LINK_TEXT_RE = new RegExp('(next|weiter|continue|>([^|]|$)|»([^|]|$))', 'i');\n\n// Match any link text/classname/id that looks like it is an end link: things\n// like \"first\", \"last\", \"end\", etc.\nexport const CAP_LINK_TEXT_RE = new RegExp('(first|last|end)', 'i');\n\n// Match any link text/classname/id that looks like it means the previous\n// page.\nexport const PREV_LINK_TEXT_RE = new RegExp('(prev|earl|old|new|<|«)', 'i');\n\n// Match any phrase that looks like it could be page, or paging, or pagination\nexport const PAGE_RE = new RegExp('pag(e|ing|inat)', 'i');\n","import { EXTRANEOUS_LINK_HINTS_RE } from '../constants';\n\nexport default function scoreExtraneousLinks(href) {\n // If the URL itself contains extraneous values, give a penalty.\n if (EXTRANEOUS_LINK_HINTS_RE.test(href)) {\n return -25;\n }\n\n return 0;\n}\n","import { range } from 'utils';\nimport {\n NEGATIVE_SCORE_RE,\n POSITIVE_SCORE_RE,\n PAGE_RE,\n} from 'utils/dom/constants';\nimport { EXTRANEOUS_LINK_HINTS_RE } from '../constants';\n\nfunction makeSig($link) {\n return `${$link.attr('class') || ''} ${$link.attr('id') || ''}`;\n}\n\nexport default function scoreByParents($link) {\n // If a parent node contains paging-like classname or id, give a\n // bonus. Additionally, if a parent_node contains bad content\n // (like 'sponsor'), give a penalty.\n let $parent = $link.parent();\n let positiveMatch = false;\n let negativeMatch = false;\n let score = 0;\n\n Array.from(range(0, 4)).forEach(() => {\n if ($parent.length === 0) {\n return;\n }\n\n const parentData = makeSig($parent, ' ');\n\n // If we have 'page' or 'paging' in our data, that's a good\n // sign. Add a bonus.\n if (!positiveMatch && PAGE_RE.test(parentData)) {\n positiveMatch = true;\n score += 25;\n }\n\n // If we have 'comment' or something in our data, and\n // we don't have something like 'content' as well, that's\n // a bad sign. Give a penalty.\n if (!negativeMatch && NEGATIVE_SCORE_RE.test(parentData)\n && EXTRANEOUS_LINK_HINTS_RE.test(parentData)) {\n if (!POSITIVE_SCORE_RE.test(parentData)) {\n negativeMatch = true;\n score -= 25;\n }\n }\n\n $parent = $parent.parent();\n });\n\n return score;\n}\n","import { PREV_LINK_TEXT_RE } from '../constants';\n\nexport default function scorePrevLink(linkData) {\n // If the link has something like \"previous\", its definitely\n // an old link, skip it.\n if (PREV_LINK_TEXT_RE.test(linkData)) {\n return -200;\n }\n\n return 0;\n}\n","import URL from 'url';\n\nimport {\n DIGIT_RE,\n EXTRANEOUS_LINK_HINTS_RE,\n} from '../constants';\n\nexport default function shouldScore(\n href,\n articleUrl,\n baseUrl,\n parsedUrl,\n linkText,\n previousUrls\n) {\n // skip if we've already fetched this url\n if (previousUrls.find(url => href === url) !== undefined) {\n return false;\n }\n\n // If we've already parsed this URL, or the URL matches the base\n // URL, or is empty, skip it.\n if (!href || href === articleUrl || href === baseUrl) {\n return false;\n }\n\n const { hostname } = parsedUrl;\n const { hostname: linkHost } = URL.parse(href);\n\n // Domain mismatch.\n if (linkHost !== hostname) {\n return false;\n }\n\n // If href doesn't contain a digit after removing the base URL,\n // it's certainly not the next page.\n const fragment = href.replace(baseUrl, '');\n if (!DIGIT_RE.test(fragment)) {\n return false;\n }\n\n // This link has extraneous content (like \"comment\") in its link\n // text, so we skip it.\n if (EXTRANEOUS_LINK_HINTS_RE.test(linkText)) {\n return false;\n }\n\n // Next page link text is never long, skip if it is too long.\n if (linkText.length > 25) {\n return false;\n }\n\n return true;\n}\n","export default function scoreBaseUrl(href, baseRegex) {\n // If the baseUrl isn't part of this URL, penalize this\n // link. It could still be the link, but the odds are lower.\n // Example:\n // http://www.actionscript.org/resources/articles/745/1/JavaScript-and-VBScript-Injection-in-ActionScript-3/Page1.html\n if (!baseRegex.test(href)) {\n return -25;\n }\n\n return 0;\n}\n","import { NEXT_LINK_TEXT_RE } from '../constants';\n\nexport default function scoreNextLinkText(linkData) {\n // Things like \"next\", \">>\", etc.\n if (NEXT_LINK_TEXT_RE.test(linkData)) {\n return 50;\n }\n\n return 0;\n}\n","import {\n NEXT_LINK_TEXT_RE,\n CAP_LINK_TEXT_RE,\n} from '../constants';\n\nexport default function scoreCapLinks(linkData) {\n // Cap links are links like \"last\", etc.\n if (CAP_LINK_TEXT_RE.test(linkData)) {\n // If we found a link like \"last\", but we've already seen that\n // this link is also \"next\", it's fine. If it's not been\n // previously marked as \"next\", then it's probably bad.\n // Penalize.\n if (NEXT_LINK_TEXT_RE.test(linkData)) {\n return -65;\n }\n }\n\n return 0;\n}\n","import URL from 'url';\n\nimport {\n getAttrs,\n isWordpress,\n} from 'utils/dom';\nimport {\n removeAnchor,\n pageNumFromUrl,\n} from 'utils/text';\n\nimport {\n scoreSimilarity,\n scoreLinkText,\n scorePageInLink,\n scoreExtraneousLinks,\n scoreByParents,\n scorePrevLink,\n shouldScore,\n scoreBaseUrl,\n scoreCapLinks,\n scoreNextLinkText,\n} from './utils';\n\nexport function makeBaseRegex(baseUrl) {\n return new RegExp(`^${baseUrl}`, 'i');\n}\n\nfunction makeSig($link, linkText) {\n return `${linkText || $link.text()} ${$link.attr('class') || ''} ${$link.attr('id') || ''}`;\n}\n\nexport default function scoreLinks({\n links,\n articleUrl,\n baseUrl,\n parsedUrl,\n $,\n previousUrls = [],\n}) {\n parsedUrl = parsedUrl || URL.parse(articleUrl);\n const baseRegex = makeBaseRegex(baseUrl);\n const isWp = isWordpress($);\n\n // Loop through all links, looking for hints that they may be next-page\n // links. Things like having \"page\" in their textContent, className or\n // id, or being a child of a node with a page-y className or id.\n //\n // After we do that, assign each page a score, and pick the one that\n // looks most like the next page link, as long as its score is strong\n // enough to have decent confidence.\n const scoredPages = links.reduce((possiblePages, link) => {\n // Remove any anchor data since we don't do a good job\n // standardizing URLs (it's hard), we're going to do\n // some checking with and without a trailing slash\n const attrs = getAttrs(link);\n\n // if href is undefined, return\n if (!attrs.href) return possiblePages;\n\n const href = removeAnchor(attrs.href);\n const $link = $(link);\n const linkText = $link.text();\n\n if (!shouldScore(href, articleUrl, baseUrl, parsedUrl, linkText, previousUrls)) {\n return possiblePages;\n }\n\n // ## PASSED THE FIRST-PASS TESTS. Start scoring. ##\n if (!possiblePages[href]) {\n possiblePages[href] = {\n score: 0,\n linkText,\n href,\n };\n } else {\n possiblePages[href].linkText = `${possiblePages[href].linkText}|${linkText}`;\n }\n\n const possiblePage = possiblePages[href];\n const linkData = makeSig($link, linkText);\n const pageNum = pageNumFromUrl(href);\n\n let score = scoreBaseUrl(href, baseRegex);\n score += scoreNextLinkText(linkData);\n score += scoreCapLinks(linkData);\n score += scorePrevLink(linkData);\n score += scoreByParents($link);\n score += scoreExtraneousLinks(href);\n score += scorePageInLink(pageNum, isWp);\n score += scoreLinkText(linkText, pageNum);\n score += scoreSimilarity(score, articleUrl, href);\n\n possiblePage.score = score;\n\n return possiblePages;\n }, {});\n\n return Reflect.ownKeys(scoredPages).length === 0 ? null : scoredPages;\n}\n","import URL from 'url';\n\nimport {\n articleBaseUrl,\n removeAnchor,\n} from 'utils/text';\nimport scoreLinks from './scoring/score-links';\n\n// Looks for and returns next page url\n// for multi-page articles\nconst GenericNextPageUrlExtractor = {\n extract({ $, url, parsedUrl, previousUrls = [] }) {\n parsedUrl = parsedUrl || URL.parse(url);\n\n const articleUrl = removeAnchor(url);\n const baseUrl = articleBaseUrl(url, parsedUrl);\n\n const links = $('a[href]').toArray();\n\n const scoredLinks = scoreLinks({\n links,\n articleUrl,\n baseUrl,\n parsedUrl,\n $,\n previousUrls,\n });\n\n // If no links were scored, return null\n if (!scoredLinks) return null;\n\n // now that we've scored all possible pages,\n // find the biggest one.\n const topPage = Reflect.ownKeys(scoredLinks).reduce((acc, link) => {\n const scoredLink = scoredLinks[link];\n return scoredLink.score > acc.score ? scoredLink : acc;\n }, { score: -100 });\n\n // If the score is less than 50, we're not confident enough to use it,\n // so we fail.\n if (topPage.score >= 50) {\n return topPage.href;\n }\n\n return null;\n },\n};\n\nexport default GenericNextPageUrlExtractor;\n","export const CANONICAL_META_SELECTORS = [\n 'og:url',\n];\n","import URL from 'url';\nimport { extractFromMeta } from 'utils/dom';\n\nimport { CANONICAL_META_SELECTORS } from './constants';\n\nfunction parseDomain(url) {\n const parsedUrl = URL.parse(url);\n const { hostname } = parsedUrl;\n return hostname;\n}\n\nfunction result(url) {\n return {\n url,\n domain: parseDomain(url),\n };\n}\n\nconst GenericUrlExtractor = {\n extract({ $, url, metaCache }) {\n const $canonical = $('link[rel=canonical]');\n if ($canonical.length !== 0) {\n const href = $canonical.attr('href');\n if (href) {\n return result(href);\n }\n }\n\n const metaUrl = extractFromMeta($, CANONICAL_META_SELECTORS, metaCache);\n if (metaUrl) {\n return result(metaUrl);\n }\n\n return result(url);\n },\n\n};\n\nexport default GenericUrlExtractor;\n","export const EXCERPT_META_SELECTORS = [\n 'og:description',\n 'twitter:description',\n];\n","import ellipsize from 'ellipsize';\n\nimport {\n extractFromMeta,\n stripTags,\n} from 'utils/dom';\n\nimport { EXCERPT_META_SELECTORS } from './constants';\n\nexport function clean(content, $, maxLength = 200) {\n content = content.replace(/[\\s\\n]+/g, ' ').trim();\n return ellipsize(content, maxLength, { ellipse: '…' });\n}\n\nconst GenericExcerptExtractor = {\n extract({ $, content, metaCache }) {\n const excerpt = extractFromMeta($, EXCERPT_META_SELECTORS, metaCache);\n if (excerpt) {\n return clean(stripTags(excerpt, $));\n }\n // Fall back to excerpting from the extracted content\n const maxLength = 200;\n const shortContent = content.slice(0, maxLength * 5);\n return clean($(shortContent).text(), $, maxLength);\n },\n};\n\nexport default GenericExcerptExtractor;\n","import cheerio from 'cheerio';\n\nimport { normalizeSpaces } from 'utils/text';\n\nconst GenericWordCountExtractor = {\n extract({ content }) {\n const $ = cheerio.load(content);\n const $content = $('div').first();\n\n const text = normalizeSpaces($content.text());\n return text.split(/\\s/).length;\n },\n};\n\nexport default GenericWordCountExtractor;\n","import cheerio from 'cheerio';\nimport stringDirection from 'string-direction';\n\nimport GenericContentExtractor from './content/extractor';\nimport GenericTitleExtractor from './title/extractor';\nimport GenericAuthorExtractor from './author/extractor';\nimport GenericDatePublishedExtractor from './date-published/extractor';\nimport GenericDekExtractor from './dek/extractor';\nimport GenericLeadImageUrlExtractor from './lead-image-url/extractor';\nimport GenericNextPageUrlExtractor from './next-page-url/extractor';\nimport GenericUrlExtractor from './url/extractor';\nimport GenericExcerptExtractor from './excerpt/extractor';\nimport GenericWordCountExtractor from './word-count/extractor';\n\nconst GenericExtractor = {\n // This extractor is the default for all domains\n domain: '*',\n title: GenericTitleExtractor.extract,\n date_published: GenericDatePublishedExtractor.extract,\n author: GenericAuthorExtractor.extract,\n content: GenericContentExtractor.extract.bind(GenericContentExtractor),\n lead_image_url: GenericLeadImageUrlExtractor.extract,\n dek: GenericDekExtractor.extract,\n next_page_url: GenericNextPageUrlExtractor.extract,\n url_and_domain: GenericUrlExtractor.extract,\n excerpt: GenericExcerptExtractor.extract,\n word_count: GenericWordCountExtractor.extract,\n direction: ({ title }) => stringDirection.getDirection(title),\n\n extract(options) {\n const { html, $ } = options;\n\n if (html && !$) {\n const loaded = cheerio.load(html);\n options.$ = loaded;\n }\n\n const title = this.title(options);\n const date_published = this.date_published(options);\n const author = this.author(options);\n const content = this.content({ ...options, title });\n const lead_image_url = this.lead_image_url({ ...options, content });\n const dek = this.dek({ ...options, content });\n const next_page_url = this.next_page_url(options);\n const excerpt = this.excerpt({ ...options, content });\n const word_count = this.word_count({ ...options, content });\n const direction = this.direction({ title });\n const { url, domain } = this.url_and_domain(options);\n\n return {\n title,\n author,\n date_published: date_published || null,\n dek,\n lead_image_url,\n content,\n next_page_url,\n url,\n domain,\n excerpt,\n word_count,\n direction,\n };\n },\n};\n\nexport default GenericExtractor;\n","import URL from 'url';\n\nimport Extractors from './all';\nimport GenericExtractor from './generic';\n\nexport default function getExtractor(url, parsedUrl) {\n parsedUrl = parsedUrl || URL.parse(url);\n const { hostname } = parsedUrl;\n const baseDomain = hostname.split('.').slice(-2).join('.');\n\n return Extractors[hostname] || Extractors[baseDomain] || GenericExtractor;\n}\n","import Cleaners from 'cleaners';\nimport { convertNodeTo } from 'utils/dom';\nimport GenericExtractor from './generic';\n\n// Remove elements by an array of selectors\nexport function cleanBySelectors($content, $, { clean }) {\n if (!clean) return $content;\n\n $(clean.join(','), $content).remove();\n\n return $content;\n}\n\n// Transform matching elements\nexport function transformElements($content, $, { transforms }) {\n if (!transforms) return $content;\n\n Reflect.ownKeys(transforms).forEach((key) => {\n const $matches = $(key, $content);\n const value = transforms[key];\n\n // If value is a string, convert directly\n if (typeof value === 'string') {\n $matches.each((index, node) => {\n convertNodeTo($(node), $, transforms[key]);\n });\n } else if (typeof value === 'function') {\n // If value is function, apply function to node\n $matches.each((index, node) => {\n const result = value($(node), $);\n // If function returns a string, convert node to that value\n if (typeof result === 'string') {\n convertNodeTo($(node), $, result);\n }\n });\n }\n });\n\n return $content;\n}\n\nfunction findMatchingSelector($, selectors) {\n return selectors.find((selector) => {\n if (Array.isArray(selector)) {\n const [s, attr] = selector;\n return $(s).length === 1 && $(s).attr(attr) && $(s).attr(attr).trim() !== '';\n }\n\n return $(selector).length === 1 && $(selector).text().trim() !== '';\n });\n}\n\nexport function select(opts) {\n const { $, type, extractionOpts, extractHtml = false } = opts;\n // Skip if there's not extraction for this type\n if (!extractionOpts) return null;\n\n // If a string is hardcoded for a type (e.g., Wikipedia\n // contributors), return the string\n if (typeof extractionOpts === 'string') return extractionOpts;\n\n const { selectors, defaultCleaner = true } = extractionOpts;\n\n const matchingSelector = findMatchingSelector($, selectors);\n\n if (!matchingSelector) return null;\n\n // Declaring result; will contain either\n // text or html, which will be cleaned\n // by the appropriate cleaner type\n\n // If the selector type requests html as its return type\n // transform and clean the element with provided selectors\n if (extractHtml) {\n let $content = $(matchingSelector);\n\n // Wrap in div so transformation can take place on root element\n $content.wrap($(''));\n $content = $content.parent();\n\n $content = transformElements($content, $, extractionOpts);\n $content = cleanBySelectors($content, $, extractionOpts);\n\n $content = Cleaners[type]($content, { ...opts, defaultCleaner });\n\n return $.html($content);\n }\n\n let result;\n\n // if selector is an array (e.g., ['img', 'src']),\n // extract the attr\n if (Array.isArray(matchingSelector)) {\n const [selector, attr] = matchingSelector;\n result = $(selector).attr(attr).trim();\n } else {\n result = $(matchingSelector).text().trim();\n }\n\n // Allow custom extractor to skip default cleaner\n // for this type; defaults to true\n if (defaultCleaner) {\n return Cleaners[type](result, opts);\n }\n\n return result;\n}\n\nfunction extractResult(opts) {\n const { type, extractor, fallback = true } = opts;\n\n const result = select({ ...opts, extractionOpts: extractor[type] });\n\n // If custom parser succeeds, return the result\n if (result) {\n return result;\n }\n\n // If nothing matches the selector, and fallback is enabled,\n // run the Generic extraction\n if (fallback) return GenericExtractor[type](opts);\n\n return null;\n}\n\nconst RootExtractor = {\n extract(extractor = GenericExtractor, opts) {\n const { contentOnly, extractedTitle } = opts;\n // This is the generic extractor. Run its extract method\n if (extractor.domain === '*') return extractor.extract(opts);\n\n opts = {\n ...opts,\n extractor,\n };\n\n if (contentOnly) {\n const content = extractResult({\n ...opts, type: 'content', extractHtml: true, title: extractedTitle,\n });\n return {\n content,\n };\n }\n const title = extractResult({ ...opts, type: 'title' });\n const date_published = extractResult({ ...opts, type: 'date_published' });\n const author = extractResult({ ...opts, type: 'author' });\n const next_page_url = extractResult({ ...opts, type: 'next_page_url' });\n const content = extractResult({\n ...opts, type: 'content', extractHtml: true, title,\n });\n const lead_image_url = extractResult({ ...opts, type: 'lead_image_url', content });\n const excerpt = extractResult({ ...opts, type: 'excerpt', content });\n const dek = extractResult({ ...opts, type: 'dek', content, excerpt });\n const word_count = extractResult({ ...opts, type: 'word_count', content });\n const direction = extractResult({ ...opts, type: 'direction', title });\n const { url, domain } =\n extractResult({ ...opts, type: 'url_and_domain' }) || { url: null, domain: null };\n\n return {\n title,\n content,\n author,\n date_published,\n lead_image_url,\n dek,\n next_page_url,\n url,\n domain,\n excerpt,\n word_count,\n direction,\n };\n },\n};\n\nexport default RootExtractor;\n","import { removeAnchor } from 'utils/text';\nimport RootExtractor from 'extractors/root-extractor';\nimport GenericExtractor from 'extractors/generic';\nimport Resource from 'resource';\n\nexport default async function collectAllPages(\n {\n next_page_url,\n html,\n $,\n metaCache,\n result,\n Extractor,\n title,\n url,\n }\n) {\n // At this point, we've fetched just the first page\n let pages = 1;\n const previousUrls = [removeAnchor(url)];\n\n // If we've gone over 26 pages, something has\n // likely gone wrong.\n while (next_page_url && pages < 26) {\n pages += 1;\n $ = await Resource.create(next_page_url);\n html = $.html();\n\n const extractorOpts = {\n url: next_page_url,\n html,\n $,\n metaCache,\n contentOnly: true,\n extractedTitle: title,\n previousUrls,\n };\n\n const nextPageResult = RootExtractor.extract(Extractor, extractorOpts);\n\n previousUrls.push(next_page_url);\n result = {\n ...result,\n content: `${result.content}