@ -1081,7 +1082,7 @@ var SENTENCE_END_RE$1=new RegExp('.( |$)');function hasSentenceEnd$1(text){retur
// used in our fetchResource function to
// ensure correctly encoded responses
functiongetEncoding$1(str){if(ENCODING_RE$1.test(str)){returnENCODING_RE$1.exec(str)[1];}returnnull;}// Browser does not like us setting user agent
varREQUEST_HEADERS=cheerio$1.browser?{}:{'User-Agent':'Mercury - https://mercury.postlight.com/web-parser/'};// The number of milliseconds to attempt to fetch a resource before timing out.
varREQUEST_HEADERS=cheerio$1.browser?{}:{'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36'};// The number of milliseconds to attempt to fetch a resource before timing out.
varFETCH_TIMEOUT=10000;// Content types that we do not extract content from
varBAD_CONTENT_TYPES=['audio/mpeg','image/gif','image/jpeg','image/jpg'];varBAD_CONTENT_TYPES_RE=newRegExp('^('+BAD_CONTENT_TYPES.join('|')+')$','i');// Use this setting as the maximum size an article can be
// for us to attempt parsing. Defaults to 5 MB.
@ -1123,7 +1124,7 @@ var SPACER_RE$1=new RegExp('transparent|spacer|blank','i');// The class we will
// but would normally remove
varKEEP_CLASS$1='mercury-parser-keep';varKEEP_SELECTORS$1=['iframe[src^="https://www.youtube.com"]','iframe[src^="http://www.youtube.com"]','iframe[src^="https://player.vimeo"]','iframe[src^="http://player.vimeo"]'];// A list of tags to strip from the output if we encounter them.
@ -1466,7 +1467,7 @@ function convertLazyLoadedImages($){$('img').each(function(_,img){var attrs=getA
// string.
create:functioncreate(url,preparedResponse,parsedUrl){var_this=this;return_asyncToGenerator(_regeneratorRuntime.mark(function_callee(){varresult,validResponse;return_regeneratorRuntime.wrap(function_callee$(_context){while(1){switch(_context.prev=_context.next){case0:result=void0;if(!preparedResponse){_context.next=6;break;}validResponse={statusMessage:'OK',statusCode:200,headers:{'content-type':'text/html','content-length':500}};result={body:preparedResponse,response:validResponse};_context.next=9;break;case6:_context.next=8;returnfetchResource$1(url,parsedUrl);case8:result=_context.sent;case9:if(!result.error){_context.next=12;break;}result.failed=true;return_context.abrupt('return',result);case12:return_context.abrupt('return',_this.generateDoc(result));case13:case'end':return_context.stop();}}},_callee,_this);}))();},generateDoc:functiongenerateDoc(_ref){varcontent=_ref.body,response=_ref.response;varcontentType=response.headers['content-type'];// TODO: Implement is_text function from
if(!contentType.includes('html')&&!contentType.includes('text')){thrownewError('Content does not appear to be text.');}var$=cheerio$1.load(content,{normalizeWhitespace:true});if($.root().children().length===0){thrownewError('No children, likely a bad parse.');}$=normalizeMetaTags($);$=convertLazyLoadedImages($);$=clean($);return$;}};varmerge=functionmerge(extractor,domains){returndomains.reduce(function(acc,domain){acc[domain]=extractor;returnacc;},{});};functionmergeSupportedDomains(extractor){returnextractor.supportedDomains?merge(extractor,[extractor.domain].concat(_toConsumableArray$1(extractor.supportedDomains))):merge(extractor,[extractor.domain]);}varBloggerExtractor={domain:'blogspot.com',content:{// Blogger is insane and does not load its content
if(!contentType.includes('html')&&!contentType.includes('text')){thrownewError('Content does not appear to be text.');}var$=cheerio$1.load(content);if($.root().children().length===0){thrownewError('No children, likely a bad parse.');}$=normalizeMetaTags($);$=convertLazyLoadedImages($);$=clean($);return$;}};varmerge=functionmerge(extractor,domains){returndomains.reduce(function(acc,domain){acc[domain]=extractor;returnacc;},{});};functionmergeSupportedDomains(extractor){returnextractor.supportedDomains?merge(extractor,[extractor.domain].concat(_toConsumableArray$1(extractor.supportedDomains))):merge(extractor,[extractor.domain]);}varBloggerExtractor={domain:'blogspot.com',content:{// Blogger is insane and does not load its content
// initially in the page, but it's all there
// in noscript
selectors:['.post-content noscript'],// Selectors to remove from the extracted content
@ -1522,7 +1523,7 @@ var WiredExtractor={domain:'www.wired.com',title:{selectors:['h1.post-title']},a
transforms:[],// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
varMSNExtractor={domain:'www.msn.com',title:{selectors:['h1']},author:{selectors:['span.authorname-txt']},content:{selectors:['div.richtext'],// Is there anything in the content you selected that needs transformed
clean:[]},date_published:{selectors:[['.PostByline__timestamp[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},dek:{selectors:[]},next_page_url:{selectors:[// enter selectors
]},excerpt:{selectors:[// enter selectors
]}};varMediumExtractor={domain:'medium.com',supportedDomains:['trackchanges.postlight.com'],title:{selectors:['h1']},author:{selectors:[['meta[name="author"]','value']]},content:{selectors:['.section-content'],// Is there anything in the content you selected that needs transformed
]}};varMediumExtractor={domain:'medium.com',supportedDomains:['trackchanges.postlight.com'],title:{selectors:['h1']},author:{selectors:[['meta[name="author"]','value']]},content:{selectors:['.section-content','article > div > section'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
clean:[]}};varWwwYoutubeComExtractor={domain:'www.youtube.com',title:{selectors:['.watch-title','h1.watch-title-container']},author:{selectors:['.yt-user-info']},date_published:{selectors:[['meta[itemProp="datePublished"]','value']],timezone:'GMT'},dek:{selectors:[// enter selectors
]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{defaultCleaner:false,selectors:[['#player-api','#eow-description']],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{'#player-api':functionplayerApi($node,$){varvideoId=$('meta[itemProp="videoId"]').attr('value');$node.html('\n <iframe src="https://www.youtube.com/embed/'+videoId+'" frameborder="0" allowfullscreen></iframe>');}},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varWwwTheguardianComExtractor={domain:'www.theguardian.com',title:{selectors:['.content__headline']},author:{selectors:['p.byline']},date_published:{selectors:[['meta[name="article:published_time"]','value']]},dek:{selectors:['.content__standfirst']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['.content__article-body'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.hide-on-mobile','.inline-icon']}};varWwwSbnationComExtractor={domain:'www.sbnation.com',title:{selectors:['h1.c-page-title']},author:{selectors:[['meta[name="author"]','value']]},date_published:{selectors:[['meta[name="article:published_time"]','value']]},dek:{selectors:['h2.c-entry-summary.p-dek']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['div.c-entry-content'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varWwwBloombergComExtractor={domain:'www.bloomberg.com',title:{selectors:[// normal articles
'.body-copy'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.inline-newsletter','.page-ad']}};varWwwBustleComExtractor={domain:'www.bustle.com',title:{selectors:['h1.post-page__title']},author:{selectors:['div.content-meta__author']},date_published:{selectors:[['time.content-meta__published-date[datetime]','datetime']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['.post-page__body'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varWwwNprOrgExtractor={domain:'www.npr.org',title:{selectors:['h1','.storytitle']},author:{selectors:['p.byline__name.byline__name--block']},date_published:{selectors:[['.dateblock time[datetime]','datetime'],['meta[name="date"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value'],['meta[name="twitter:image:src"]','value']]},content:{selectors:['.storytext'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{'.bucketwrap.image':'figure','.bucketwrap.image .credit-caption':'figcaption'},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['div.enlarge_measure']}};varWwwRecodeNetExtractor={domain:'www.recode.net',title:{selectors:['h1.c-page-title']},author:{selectors:[['meta[name="author"]','value']]},date_published:{selectors:[['meta[name="article:published_time"]','value']]},dek:{selectors:['h2.c-entry-summary.p-dek']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:[['figure.e-image--hero','.c-entry-content'],'.c-entry-content'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varQzComExtractor={domain:'qz.com',title:{selectors:['header.item-header.content-width-responsive']},author:{selectors:[['meta[name="author"]','value']]},date_published:{selectors:['.timestamp']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:[['figure.featured-image','.item-body'],'.item-body'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.article-aside','.progressive-image-thumbnail']}};varWwwDmagazineComExtractor={domain:'www.dmagazine.com',title:{selectors:['h1.story__title']},author:{selectors:['.story__info .story__info__item:first-child']},date_published:{selectors:[// enter selectors
'.story__info'],timezone:'America/Chicago'},dek:{selectors:['.story__subhead']},lead_image_url:{selectors:[['article figure a:first-child','href']]},content:{selectors:['.story__content'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varWwwReutersComExtractor={domain:'www.reuters.com',title:{selectors:['h1.article-headline']},author:{selectors:['.author']},date_published:{selectors:[['meta[name="og:article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['#article-text'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{'.article-subtitle':'h4'},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['#article-byline .author']}};varMashableComExtractor={domain:'mashable.com',title:{selectors:['h1.title']},author:{selectors:['span.author_name a']},date_published:{selectors:[['meta[name="og:article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['section.article-content.blueprint'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{'.image-credit':'figcaption'},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varWwwChicagotribuneComExtractor={domain:'www.chicagotribune.com',title:{selectors:['h1.trb_ar_hl_t']},author:{selectors:['span.trb_ar_by_nm_au']},date_published:{selectors:[['meta[itemprop="datePublished"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['div.trb_ar_page'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varWwwVoxComExtractor={domain:'www.vox.com',title:{selectors:['h1.c-page-title']},author:{selectors:[['meta[name="author"]','value']]},date_published:{selectors:[['meta[name="article:published_time"]','value']]},dek:{selectors:['.p-dek']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:[['figure.e-image--hero','.c-entry-content'],'.c-entry-content'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{'figure .e-image__image noscript':functionfigureEImage__imageNoscript($node){varimgHtml=$node.html();$node.parents('.e-image__image').find('.c-dynamic-image').replaceWith(imgHtml);},'figure .e-image__meta':'figcaption'},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varWwwCnbcComExtractor={domain:'www.cnbc.com',title:{selectors:['h1.title']},author:{selectors:[['meta[name="author"]','value']]},date_published:{selectors:[['meta[name="article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['div#article_body.content','div.story'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varWwwPopsugarComExtractor={domain:'www.popsugar.com',title:{selectors:['h2.post-title','title-text']},author:{selectors:[['meta[name="article:author"]','value']]},date_published:{selectors:[['meta[name="article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['#content'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.share-copy-title','.post-tags','.reactions']}};varObserverComExtractor={domain:'observer.com',title:{selectors:['h1.entry-title']},author:{selectors:['.author','.vcard']},date_published:{selectors:[['meta[name="article:published_time"]','value']]},dek:{selectors:['h2.dek']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['div.entry-content'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varPeopleComExtractor={domain:'people.com',title:{selectors:[['meta[name="og:title"]','value']]},author:{selectors:['a.author.url.fn']},date_published:{selectors:[['meta[name="article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['div.article-body__inner'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varWwwUsmagazineComExtractor={domain:'www.usmagazine.com',title:{selectors:['header h1']},author:{selectors:['a.article-byline.tracked-offpage']},date_published:{timezone:'America/New_York',selectors:['time.article-published-date']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['div.article-body-inner'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.module-related']}};varWwwRollingstoneComExtractor={domain:'www.rollingstone.com',title:{selectors:['h1.content-title']},author:{selectors:['a.content-author.tracked-offpage']},date_published:{selectors:['time.content-published-date'],timezone:'America/New_York'},dek:{selectors:['.content-description']},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:[['.lead-container','.article-content'],'.article-content'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.module-related']}};vartwofortysevensportsComExtractor={domain:'247sports.com',title:{selectors:['title','article header h1']},author:{selectors:['.author']},date_published:{selectors:[['time[data-published]','data-published']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['section.body.article'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varUproxxComExtractor={domain:'uproxx.com',title:{selectors:['div.post-top h1']},author:{selectors:['.post-top .authorname']},date_published:{selectors:[['meta[name="article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['.post-body'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{'div.image':'figure','div.image .wp-media-credit':'figcaption'},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varWwwEonlineComExtractor={domain:'www.eonline.com',title:{selectors:['h1.article__title']},author:{selectors:['.entry-meta__author a']},date_published:{selectors:[['meta[itemprop="datePublished"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:[['.post-content section, .post-content div.post-content__image']],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{'div.post-content__image':'figure','div.post-content__image .image__credits':'figcaption'},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varWwwMiamiheraldComExtractor={domain:'www.miamiherald.com',title:{selectors:['h1.title']},date_published:{selectors:['p.published-date'],timezone:'America/New_York'},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['div.dateline-storybody'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varWwwRefinery29ComExtractor={domain:'www.refinery29.com',title:{selectors:['h1.title']},author:{selectors:['.contributor']},date_published:{selectors:[['meta[name="sailthru.date"]','value']],timezone:'America/New_York'},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:[['.full-width-opener','.article-content'],'.article-content','.body'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{'div.loading noscript':functiondivLoadingNoscript($node){varimgHtml=$node.html();$node.parents('.loading').replaceWith(imgHtml);},'.section-image':'figure','.section-image .content-caption':'figcaption','.section-text':'p'},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.story-share']}};varHellogigglesComExtractor={domain:'hellogiggles.com',title:{selectors:['.title']},author:{selectors:['.author-link']},date_published:{selectors:[['meta[name="article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['.entry-content'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varThoughtcatalogComExtractor={domain:'thoughtcatalog.com',title:{selectors:['h1.title',['meta[name="og:title"]','value']]},author:{selectors:['div.col-xs-12.article_header div.writer-container.writer-container-inline.writer-no-avatar h4.writer-name','h1.writer-name']},date_published:{selectors:[['meta[name="article:published_time"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['.entry.post'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.tc_mark']}};varWwwNjComExtractor={domain:'www.nj.com',title:{selectors:[['meta[name="title"]','value']]},author:{selectors:[['meta[name="article_author"]','value']]},date_published:{selectors:[['meta[name="article_date_original"]','value']],timezone:'America/New_York'},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['.entry-content'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varWwwInquisitrComExtractor={domain:'www.inquisitr.com',title:{selectors:['h1.entry-title.story--header--title']},author:{selectors:['div.story--header--author']},date_published:{selectors:[['meta[name="datePublished"]','value']]},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['article.story','.entry-content.'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:['.post-category','.story--header--socials','.story--header--content']}};varWwwNbcnewsComExtractor={domain:'www.nbcnews.com',title:{selectors:['div.article-hed h1']},author:{selectors:['span.byline_author']},date_published:{selectors:[['.flag_article-wrapper time.timestamp_article[datetime]','datetime'],'.flag_article-wrapper time'],timezone:'America/New_York'},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:['div.article-body'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
// the result
clean:[]}};varFortuneComExtractor={domain:'fortune.com',title:{selectors:['h1']},author:{selectors:[['meta[name="author"]','value']]},date_published:{selectors:['.MblGHNMJ'],timezone:'UTC'},lead_image_url:{selectors:[['meta[name="og:image"]','value']]},content:{selectors:[['picture','article.row'],'article.row'],// Is there anything in the content you selected that needs transformed
// before it's consumable content? E.g., unusual lazy loaded images
transforms:{},// Is there anything that is in the result that shouldn't be?
// The clean selectors will remove anything that matches from
varTEXT_LINK_RE=newRegExp('http(s)?://','i');// An ordered list of meta tag names that denote likely article deks.
@ -2174,7 +2334,7 @@ var topPage=_Reflect$ownKeys$1(scoredLinks).reduce(function(acc,link){var scored
// so we fail.
if(topPage.score>=50){returntopPage.href;}returnnull;}};varCANONICAL_META_SELECTORS=['og:url'];functionparseDomain(url){varparsedUrl=URL$1.parse(url);varhostname=parsedUrl.hostname;returnhostname;}functionresult(url){return{url:url,domain:parseDomain(url)};}varGenericUrlExtractor={extract:functionextract(_ref){var$=_ref.$,url=_ref.url,metaCache=_ref.metaCache;var$canonical=$('link[rel=canonical]');if($canonical.length!==0){varhref=$canonical.attr('href');if(href){returnresult(href);}}varmetaUrl=extractFromMeta$$1($,CANONICAL_META_SELECTORS,metaCache);if(metaUrl){returnresult(metaUrl);}returnresult(url);}};varEXCERPT_META_SELECTORS=['og:description','twitter:description'];functionclean$2(content,$){varmaxLength=arguments.length>2&&arguments[2]!==undefined?arguments[2]:200;content=content.replace(/[\s\n]+/g,' ').trim();returnellipsize$1(content,maxLength,{ellipse:'…'});}varGenericExcerptExtractor={extract:functionextract(_ref){var$=_ref.$,content=_ref.content,metaCache=_ref.metaCache;varexcerpt=extractFromMeta$$1($,EXCERPT_META_SELECTORS,metaCache);if(excerpt){returnclean$2(stripTags$1(excerpt,$));}// Fall back to excerpting from the extracted content
varmaxLength=200;varshortContent=content.slice(0,maxLength*5);returnclean$2($(shortContent).text(),$,maxLength);}};varGenericWordCountExtractor={extract:functionextract(_ref){varcontent=_ref.content;var$=cheerio$1.load(content);var$content=$('div').first();vartext=normalizeSpaces$1($content.text());returntext.split(/\s/).length;}};varGenericExtractor={// This extractor is the default for all domains
domain:'*',title:GenericTitleExtractor.extract,date_published:GenericDatePublishedExtractor.extract,author:GenericAuthorExtractor.extract,content:GenericContentExtractor.extract.bind(GenericContentExtractor),lead_image_url:GenericLeadImageUrlExtractor.extract,dek:GenericDekExtractor.extract,next_page_url:GenericNextPageUrlExtractor.extract,url_and_domain:GenericUrlExtractor.extract,excerpt:GenericExcerptExtractor.extract,word_count:GenericWordCountExtractor.extract,direction:functiondirection(_ref){vartitle=_ref.title;returnstringDirection$1.getDirection(title);},extract:functionextract(options){varhtml=options.html,$=options.$;if(html&&!$){varloaded=cheerio$1.load(html);options.$=loaded;}vartitle=this.title(options);vardate_published=this.date_published(options);varauthor=this.author(options);varcontent=this.content(_extends$1({},options,{title:title}));varlead_image_url=this.lead_image_url(_extends$1({},options,{content:content}));vardek=this.dek(_extends$1({},options,{content:content}));varnext_page_url=this.next_page_url(options);varexcerpt=this.excerpt(_extends$1({},options,{content:content}));varword_count=this.word_count(_extends$1({},options,{content:content}));vardirection=this.direction({title:title});var_url_and_domain=this.url_and_domain(options),url=_url_and_domain.url,domain=_url_and_domain.domain;return{title:title,author:author,date_published:date_published||null,dek:dek,lead_image_url:lead_image_url,content:content,next_page_url:next_page_url,url:url,domain:domain,excerpt:excerpt,word_count:word_count,direction:direction};}};function getExtractor(url,parsedUrl){parsedUrl=parsedUrl||URL$1.parse(url);var_parsedUrl=parsedUrl,hostname=_parsedUrl.hostname;varbaseDomain=hostname.split('.').slice(-2).join('.');returnExtractors[hostname]||Extractors[baseDomain]||GenericExtractor;}// Remove elements by an array of selectors
domain:'*',title:GenericTitleExtractor.extract,date_published:GenericDatePublishedExtractor.extract,author:GenericAuthorExtractor.extract,content:GenericContentExtractor.extract.bind(GenericContentExtractor),lead_image_url:GenericLeadImageUrlExtractor.extract,dek:GenericDekExtractor.extract,next_page_url:GenericNextPageUrlExtractor.extract,url_and_domain:GenericUrlExtractor.extract,excerpt:GenericExcerptExtractor.extract,word_count:GenericWordCountExtractor.extract,direction:functiondirection(_ref){vartitle=_ref.title;returnstringDirection$1.getDirection(title);},extract:functionextract(options){varhtml=options.html,$=options.$;if(html&&!$){varloaded=cheerio$1.load(html);options.$=loaded;}vartitle=this.title(options);vardate_published=this.date_published(options);varauthor=this.author(options);varcontent=this.content(_extends$1({},options,{title:title}));varlead_image_url=this.lead_image_url(_extends$1({},options,{content:content}));vardek=this.dek(_extends$1({},options,{content:content}));varnext_page_url=this.next_page_url(options);varexcerpt=this.excerpt(_extends$1({},options,{content:content}));varword_count=this.word_count(_extends$1({},options,{content:content}));vardirection=this.direction({title:title});var_url_and_domain=this.url_and_domain(options),url=_url_and_domain.url,domain=_url_and_domain.domain;return{title:title,author:author,date_published:date_published||null,dek:dek,lead_image_url:lead_image_url,content:content,next_page_url:next_page_url,url:url,domain:domain,excerpt:excerpt,word_count:word_count,direction:direction};}};var Detectors={'meta[name="al:ios:app_name"][value="Medium"]':MediumExtractor,'meta[name="generator"][value="blogger"]':BloggerExtractor};functiondetectByHtml($){varselector=_Reflect$ownKeys$1(Detectors).find(function(s){return$(s).length>0;});returnDetectors[selector];}function getExtractor(url,parsedUrl,$){parsedUrl=parsedUrl||URL$1.parse(url);var_parsedUrl=parsedUrl,hostname=_parsedUrl.hostname;varbaseDomain=hostname.split('.').slice(-2).join('.');returnExtractors[hostname]||Extractors[baseDomain]||detectByHtml($)||GenericExtractor;}// Remove elements by an array of selectors
functioncleanBySelectors($content,$,_ref){varclean=_ref.clean;if(!clean)return$content;$(clean.join(','),$content).remove();return$content;}// Transform matching elements
functiontransformElements($content,$,_ref2){vartransforms=_ref2.transforms;if(!transforms)return$content;_Reflect$ownKeys$1(transforms).forEach(function(key){var$matches=$(key,$content);varvalue=transforms[key];// If value is a string, convert directly
if(typeofvalue==='string'){$matches.each(function(index,node){convertNodeTo$$1($(node),$,transforms[key]);});}elseif(typeofvalue==='function'){// If value is function, apply function to node
if(extractor.domain==='*')returnextractor.extract(opts);opts=_extends$1({},opts,{extractor:extractor});if(contentOnly){var_content=extractResult(_extends$1({},opts,{type:'content',extractHtml:true,title:extractedTitle}));return{content:_content};}vartitle=extractResult(_extends$1({},opts,{type:'title'}));vardate_published=extractResult(_extends$1({},opts,{type:'date_published'}));varauthor=extractResult(_extends$1({},opts,{type:'author'}));varnext_page_url=extractResult(_extends$1({},opts,{type:'next_page_url'}));varcontent=extractResult(_extends$1({},opts,{type:'content',extractHtml:true,title:title}));varlead_image_url=extractResult(_extends$1({},opts,{type:'lead_image_url',content:content}));varexcerpt=extractResult(_extends$1({},opts,{type:'excerpt',content:content}));vardek=extractResult(_extends$1({},opts,{type:'dek',content:content,excerpt:excerpt}));varword_count=extractResult(_extends$1({},opts,{type:'word_count',content:content}));vardirection=extractResult(_extends$1({},opts,{type:'direction',title:title}));var_ref3=extractResult(_extends$1({},opts,{type:'url_and_domain'}))||{url:null,domain:null},url=_ref3.url,domain=_ref3.domain;return{title:title,content:content,author:author,date_published:date_published,lead_image_url:lead_image_url,dek:dek,next_page_url:next_page_url,url:url,domain:domain,excerpt:excerpt,word_count:word_count,direction:direction};}};varcollectAllPages=function(){var_ref=_asyncToGenerator(_regeneratorRuntime.mark(function_callee(_ref2){varnext_page_url=_ref2.next_page_url,html=_ref2.html,$=_ref2.$,metaCache=_ref2.metaCache,result=_ref2.result,Extractor=_ref2.Extractor,title=_ref2.title,url=_ref2.url;varpages,previousUrls,extractorOpts,nextPageResult,word_count;return_regeneratorRuntime.wrap(function_callee$(_context){while(1){switch(_context.prev=_context.next){case0:// At this point, we've fetched just the first page
pages=1;previousUrls=[removeAnchor$1(url)];// If we've gone over 26 pages, something has
// likely gone wrong.
case2:if(!(next_page_url&&pages<26)){_context.next=15;break;}pages+=1;_context.next=6;returnResource.create(next_page_url);case6:$=_context.sent;html=$.html();extractorOpts={url:next_page_url,html:html,$:$,metaCache:metaCache,contentOnly:true,extractedTitle:title,previousUrls:previousUrls};nextPageResult=RootExtractor.extract(Extractor,extractorOpts);previousUrls.push(next_page_url);result=_extends$1({},result,{content:result.content+'<hr><h4>Page '+pages+'</h4>'+nextPageResult.content});next_page_url=nextPageResult.next_page_url;_context.next=2;break;case15:word_count=GenericExtractor.word_count({content:'<div>'+result.content+'</div>'});return_context.abrupt('return',_extends$1({},result,{total_pages:pages,pages_rendered:pages,word_count:word_count}));case17:case'end':return_context.stop();}}},_callee,this);}));functioncollectAllPages(_x){return_ref.apply(this,arguments);}returncollectAllPages;}();varMercury={parse:functionparse(url,html){var_this=this;varopts=arguments.length>2&&arguments[2]!==undefined?arguments[2]:{};return_asyncToGenerator(_regeneratorRuntime.mark(function_callee(){var_opts$fetchAllPages,fetchAllPages,_opts$fallback,fallback,parsedUrl,Extractor,$,metaCache,result,_result,title,next_page_url;return_regeneratorRuntime.wrap(function_callee$(_context){while(1){switch(_context.prev=_context.next){case0:_opts$fetchAllPages=opts.fetchAllPages,fetchAllPages=_opts$fetchAllPages===undefined?true:_opts$fetchAllPages,_opts$fallback=opts.fallback,fallback=_opts$fallback===undefined?true:_opts$fallback;// if no url was passed and this is the browser version,
case2:if(!(next_page_url&&pages<26)){_context.next=15;break;}pages+=1;_context.next=6;returnResource.create(next_page_url);case6:$=_context.sent;html=$.html();extractorOpts={url:next_page_url,html:html,$:$,metaCache:metaCache,contentOnly:true,extractedTitle:title,previousUrls:previousUrls};nextPageResult=RootExtractor.extract(Extractor,extractorOpts);previousUrls.push(next_page_url);result=_extends$1({},result,{content:result.content+'<hr><h4>Page '+pages+'</h4>'+nextPageResult.content});next_page_url=nextPageResult.next_page_url;_context.next=2;break;case15:word_count=GenericExtractor.word_count({content:'<div>'+result.content+'</div>'});return_context.abrupt('return',_extends$1({},result,{total_pages:pages,pages_rendered:pages,word_count:word_count}));case17:case'end':return_context.stop();}}},_callee,this);}));functioncollectAllPages(_x){return_ref.apply(this,arguments);}returncollectAllPages;}();varMercury={parse:functionparse(url,html){var_this=this;varopts=arguments.length>2&&arguments[2]!==undefined?arguments[2]:{};return_asyncToGenerator(_regeneratorRuntime.mark(function_callee(){var_opts$fetchAllPages,fetchAllPages,_opts$fallback,fallback,parsedUrl,$,Extractor,metaCache,result,_result,title,next_page_url;return_regeneratorRuntime.wrap(function_callee$(_context){while(1){switch(_context.prev=_context.next){case0:_opts$fetchAllPages=opts.fetchAllPages,fetchAllPages=_opts$fetchAllPages===undefined?true:_opts$fetchAllPages,_opts$fallback=opts.fallback,fallback=_opts$fallback===undefined?true:_opts$fallback;// if no url was passed and this is the browser version,
// set url to window.location.href and load the html
html=html||cheerio$1.html();}parsedUrl=URL$1.parse(url);if(validateUrl(parsedUrl)){_context.next=5;break;}return_context.abrupt('return',Errors.badUrl);case5:Extractor=getExtractor(url,parsedUrl);// console.log(`Using extractor for ${Extractor.domain}`);
_context.next=8;returnResource.create(url,html,parsedUrl);case8:$=_context.sent;if(!$.failed){_context.next=11;break;}return_context.abrupt('return',$);case11:// if html still has not been set (i.e., url passed to Mercury.parse),
html=html||cheerio$1.html();}parsedUrl=URL$1.parse(url);if(validateUrl(parsedUrl)){_context.next=5;break;}return_context.abrupt('return',Errors.badUrl);case5:_context.next=7;returnResource.create(url,html,parsedUrl);case7:$=_context.sent;Extractor=getExtractor(url,parsedUrl,$);// console.log(`Using extractor for ${Extractor.domain}`);
// If we found an error creating the resource, return that error
if(!$.failed){_context.next=11;break;}return_context.abrupt('return',$);case11:// if html still has not been set (i.e., url passed to Mercury.parse),
// set html from the response of Resource.create
if(!html){html=$.html();}// Cached value of every meta name in our document.
// Used when extracting title/author/date_published/dek
@ -2284,7 +2445,7 @@ var _templateObject2 = _taggedTemplateLiteral(['\n import assert from \'asser