@ -105,9 +105,9 @@ Readability.prototype = {
* @ return void
* @ return void
* /
* /
_fixRelativeUris : function ( articleContent ) {
_fixRelativeUris : function ( articleContent ) {
let scheme = this . _uri . scheme ;
var scheme = this . _uri . scheme ;
let prePath = this . _uri . prePath ;
var prePath = this . _uri . prePath ;
let pathBase = this . _uri . pathBase ;
var pathBase = this . _uri . pathBase ;
function toAbsoluteURI ( uri ) {
function toAbsoluteURI ( uri ) {
// If this is already an absolute URI, return it.
// If this is already an absolute URI, return it.
@ -128,10 +128,10 @@ Readability.prototype = {
}
}
function convertRelativeURIs ( tagName , propName ) {
function convertRelativeURIs ( tagName , propName ) {
let elems = articleContent . getElementsByTagName ( tagName ) ;
var elems = articleContent . getElementsByTagName ( tagName ) ;
for ( let i = elems . length ; -- i >= 0 ; ) {
for ( var i = elems . length ; -- i >= 0 ; ) {
let elem = elems [ i ] ;
var elem = elems [ i ] ;
let relativeURI = elem . getAttribute ( propName ) ;
var relativeURI = elem . getAttribute ( propName ) ;
if ( relativeURI != null )
if ( relativeURI != null )
elems [ i ] . setAttribute ( propName , toAbsoluteURI ( relativeURI ) ) ;
elems [ i ] . setAttribute ( propName , toAbsoluteURI ( relativeURI ) ) ;
}
}
@ -150,9 +150,9 @@ Readability.prototype = {
* @ return void
* @ return void
* * /
* * /
_getArticleTitle : function ( ) {
_getArticleTitle : function ( ) {
let doc = this . _doc ;
var doc = this . _doc ;
let curTitle = "" ;
var curTitle = "" ;
let origTitle = "" ;
var origTitle = "" ;
try {
try {
curTitle = origTitle = doc . title ;
curTitle = origTitle = doc . title ;
@ -173,7 +173,7 @@ Readability.prototype = {
if ( curTitle . split ( ' ' ) . length < 3 )
if ( curTitle . split ( ' ' ) . length < 3 )
curTitle = origTitle . replace ( /[^:]*[:](.*)/gi , '$1' ) ;
curTitle = origTitle . replace ( /[^:]*[:](.*)/gi , '$1' ) ;
} else if ( curTitle . length > 150 || curTitle . length < 15 ) {
} else if ( curTitle . length > 150 || curTitle . length < 15 ) {
let hOnes = doc . getElementsByTagName ( 'h1' ) ;
var hOnes = doc . getElementsByTagName ( 'h1' ) ;
if ( hOnes . length === 1 )
if ( hOnes . length === 1 )
curTitle = this . _getInnerText ( hOnes [ 0 ] ) ;
curTitle = this . _getInnerText ( hOnes [ 0 ] ) ;
@ -194,13 +194,13 @@ Readability.prototype = {
* @ return void
* @ return void
* * /
* * /
_prepDocument : function ( ) {
_prepDocument : function ( ) {
let doc = this . _doc ;
var doc = this . _doc ;
// In some cases a body element can't be found (if the HTML is
// In some cases a body element can't be found (if the HTML is
// totally hosed for example) so we create a new body node and
// totally hosed for example) so we create a new body node and
// append it to the document.
// append it to the document.
if ( ! doc . body ) {
if ( ! doc . body ) {
let body = doc . createElement ( "body" ) ;
var body = doc . createElement ( "body" ) ;
try {
try {
doc . body = body ;
doc . body = body ;
@ -211,15 +211,15 @@ Readability.prototype = {
}
}
// Remove all style tags in head
// Remove all style tags in head
let styleTags = doc . getElementsByTagName ( "style" ) ;
var styleTags = doc . getElementsByTagName ( "style" ) ;
for ( let st = 0 ; st < styleTags . length ; st += 1 ) {
for ( var st = 0 ; st < styleTags . length ; st += 1 ) {
styleTags [ st ] . textContent = "" ;
styleTags [ st ] . textContent = "" ;
}
}
this . _replaceBrs ( doc . body ) ;
this . _replaceBrs ( doc . body ) ;
let fonts = doc . getElementsByTagName ( "FONT" ) ;
var fonts = doc . getElementsByTagName ( "FONT" ) ;
for ( let i = fonts . length ; -- i >= 0 ; ) {
for ( var i = fonts . length ; -- i >= 0 ; ) {
this . _setNodeTag ( fonts [ i ] , "SPAN" ) ;
this . _setNodeTag ( fonts [ i ] , "SPAN" ) ;
}
}
} ,
} ,
@ -230,7 +230,7 @@ Readability.prototype = {
* returned .
* returned .
* /
* /
_nextElement : function ( node ) {
_nextElement : function ( node ) {
let next = node ;
var next = node ;
while ( next
while ( next
&& ( next . nodeType != Node . ELEMENT _NODE )
&& ( next . nodeType != Node . ELEMENT _NODE )
&& this . REGEXPS . whitespace . test ( next . textContent ) ) {
&& this . REGEXPS . whitespace . test ( next . textContent ) ) {
@ -247,21 +247,21 @@ Readability.prototype = {
* < div > foo < br > bar < p > abc < / p > < / d i v >
* < div > foo < br > bar < p > abc < / p > < / d i v >
* /
* /
_replaceBrs : function ( elem ) {
_replaceBrs : function ( elem ) {
let brs = elem . getElementsByTagName ( "br" ) ;
var brs = elem . getElementsByTagName ( "br" ) ;
for ( let i = 0 ; i < brs . length ; i ++ ) {
for ( var i = 0 ; i < brs . length ; i ++ ) {
let br = brs [ i ] ;
var br = brs [ i ] ;
let next = br . nextSibling ;
var next = br . nextSibling ;
// Whether 2 or more <br> elements have been found and replaced with a
// Whether 2 or more <br> elements have been found and replaced with a
// <p> block.
// <p> block.
let replaced = false ;
var replaced = false ;
// If we find a <br> chain, remove the <br>s until we hit another element
// If we find a <br> chain, remove the <br>s until we hit another element
// or non-whitespace. This leaves behind the first <br> in the chain
// or non-whitespace. This leaves behind the first <br> in the chain
// (which will be replaced with a <p> later).
// (which will be replaced with a <p> later).
while ( ( next = this . _nextElement ( next ) ) && ( next . tagName == "BR" ) ) {
while ( ( next = this . _nextElement ( next ) ) && ( next . tagName == "BR" ) ) {
replaced = true ;
replaced = true ;
let sibling = next . nextSibling ;
var sibling = next . nextSibling ;
next . parentNode . removeChild ( next ) ;
next . parentNode . removeChild ( next ) ;
next = sibling ;
next = sibling ;
}
}
@ -270,20 +270,20 @@ Readability.prototype = {
// all sibling nodes as children of the <p> until we hit another <br>
// all sibling nodes as children of the <p> until we hit another <br>
// chain.
// chain.
if ( replaced ) {
if ( replaced ) {
let p = this . _doc . createElement ( "p" ) ;
var p = this . _doc . createElement ( "p" ) ;
br . parentNode . replaceChild ( p , br ) ;
br . parentNode . replaceChild ( p , br ) ;
next = p . nextSibling ;
next = p . nextSibling ;
while ( next ) {
while ( next ) {
// If we've hit another <br><br>, we're done adding children to this <p>.
// If we've hit another <br><br>, we're done adding children to this <p>.
if ( next . tagName == "BR" ) {
if ( next . tagName == "BR" ) {
let nextElem = this . _nextElement ( next ) ;
var nextElem = this . _nextElement ( next ) ;
if ( nextElem && nextElem . tagName == "BR" )
if ( nextElem && nextElem . tagName == "BR" )
break ;
break ;
}
}
// Otherwise, make this node a child of the new <p>.
// Otherwise, make this node a child of the new <p>.
let sibling = next . nextSibling ;
var sibling = next . nextSibling ;
p . appendChild ( next ) ;
p . appendChild ( next ) ;
next = sibling ;
next = sibling ;
}
}
@ -326,11 +326,11 @@ Readability.prototype = {
this . _cleanConditionally ( articleContent , "div" ) ;
this . _cleanConditionally ( articleContent , "div" ) ;
// Remove extra paragraphs
// Remove extra paragraphs
let articleParagraphs = articleContent . getElementsByTagName ( 'p' ) ;
var articleParagraphs = articleContent . getElementsByTagName ( 'p' ) ;
for ( let i = articleParagraphs . length - 1 ; i >= 0 ; i -= 1 ) {
for ( var i = articleParagraphs . length - 1 ; i >= 0 ; i -= 1 ) {
let imgCount = articleParagraphs [ i ] . getElementsByTagName ( 'img' ) . length ;
var imgCount = articleParagraphs [ i ] . getElementsByTagName ( 'img' ) . length ;
let embedCount = articleParagraphs [ i ] . getElementsByTagName ( 'embed' ) . length ;
var embedCount = articleParagraphs [ i ] . getElementsByTagName ( 'embed' ) . length ;
let objectCount = articleParagraphs [ i ] . getElementsByTagName ( 'object' ) . length ;
var objectCount = articleParagraphs [ i ] . getElementsByTagName ( 'object' ) . length ;
if ( imgCount === 0 &&
if ( imgCount === 0 &&
embedCount === 0 &&
embedCount === 0 &&
@ -339,10 +339,10 @@ Readability.prototype = {
articleParagraphs [ i ] . parentNode . removeChild ( articleParagraphs [ i ] ) ;
articleParagraphs [ i ] . parentNode . removeChild ( articleParagraphs [ i ] ) ;
}
}
let brs = articleContent . getElementsByTagName ( "BR" ) ;
var brs = articleContent . getElementsByTagName ( "BR" ) ;
for ( let i = brs . length ; -- i >= 0 ; ) {
for ( var i = brs . length ; -- i >= 0 ; ) {
let br = brs [ i ] ;
var br = brs [ i ] ;
let next = this . _nextElement ( br . nextSibling ) ;
var next = this . _nextElement ( br . nextSibling ) ;
if ( next && next . tagName == "P" )
if ( next && next . tagName == "P" )
br . parentNode . removeChild ( br ) ;
br . parentNode . removeChild ( br ) ;
}
}
@ -402,17 +402,17 @@ Readability.prototype = {
* @ return Element
* @ return Element
* * /
* * /
_grabArticle : function ( page ) {
_grabArticle : function ( page ) {
let doc = this . _doc ;
var doc = this . _doc ;
let isPaging = ( page !== null ? true : false ) ;
var isPaging = ( page !== null ? true : false ) ;
page = page ? page : this . _doc . body ;
page = page ? page : this . _doc . body ;
let pageCacheHtml = page . innerHTML ;
var pageCacheHtml = page . innerHTML ;
// Check if any "dir" is set on the toplevel document element
// Check if any "dir" is set on the toplevel document element
this . _articleDir = doc . documentElement . getAttribute ( "dir" ) ;
this . _articleDir = doc . documentElement . getAttribute ( "dir" ) ;
while ( true ) {
while ( true ) {
let stripUnlikelyCandidates = this . _flagIsActive ( this . FLAG _STRIP _UNLIKELYS ) ;
var stripUnlikelyCandidates = this . _flagIsActive ( this . FLAG _STRIP _UNLIKELYS ) ;
let allElements = page . getElementsByTagName ( '*' ) ;
var allElements = page . getElementsByTagName ( '*' ) ;
// First, node prepping. Trash nodes that look cruddy (like ones with the
// First, node prepping. Trash nodes that look cruddy (like ones with the
// class name "comment", etc), and turn divs into P tags where they have been
// class name "comment", etc), and turn divs into P tags where they have been
@ -420,11 +420,11 @@ Readability.prototype = {
//
//
// Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5
// Note: Assignment from index for performance. See http://www.peachpit.com/articles/article.aspx?p=31567&seqNum=5
// TODO: Shouldn't this be a reverse traversal?
// TODO: Shouldn't this be a reverse traversal?
let node = null ;
var node = null ;
let nodesToScore = [ ] ;
var nodesToScore = [ ] ;
// Let each node know its index in the allElements array.
// var each node know its index in the allElements array.
for ( let i = allElements . length ; -- i >= 0 ; ) {
for ( var i = allElements . length ; -- i >= 0 ; ) {
allElements [ i ] . _index = i ;
allElements [ i ] . _index = i ;
}
}
@ -434,18 +434,18 @@ Readability.prototype = {
* of its children - from the allElements array .
* of its children - from the allElements array .
* /
* /
function purgeNode ( node ) {
function purgeNode ( node ) {
for ( let i = node . childNodes . length ; -- i >= 0 ; ) {
for ( var i = node . childNodes . length ; -- i >= 0 ; ) {
purgeNode ( node . childNodes [ i ] ) ;
purgeNode ( node . childNodes [ i ] ) ;
}
}
if ( node . _index !== undefined && allElements [ node . _index ] == node )
if ( node . _index !== undefined && allElements [ node . _index ] == node )
delete allElements [ node . _index ] ;
delete allElements [ node . _index ] ;
}
}
for ( let nodeIndex = 0 ; nodeIndex < allElements . length ; nodeIndex ++ ) {
for ( var nodeIndex = 0 ; nodeIndex < allElements . length ; nodeIndex ++ ) {
if ( ! ( node = allElements [ nodeIndex ] ) )
if ( ! ( node = allElements [ nodeIndex ] ) )
continue ;
continue ;
let matchString = node . className + node . id ;
var matchString = node . className + node . id ;
if ( matchString . search ( this . REGEXPS . byline ) !== - 1 && ! this . _articleByline ) {
if ( matchString . search ( this . REGEXPS . byline ) !== - 1 && ! this . _articleByline ) {
if ( this . _isValidByline ( node . textContent ) ) {
if ( this . _isValidByline ( node . textContent ) ) {
this . _articleByline = node . textContent . trim ( ) ;
this . _articleByline = node . textContent . trim ( ) ;
@ -476,11 +476,11 @@ Readability.prototype = {
// element. DIVs with only a P element inside and no text content can be
// element. DIVs with only a P element inside and no text content can be
// safely converted into plain P elements to avoid confusing the scoring
// safely converted into plain P elements to avoid confusing the scoring
// algorithm with DIVs with are, in practice, paragraphs.
// algorithm with DIVs with are, in practice, paragraphs.
let pIndex = this . _getSinglePIndexInsideDiv ( node ) ;
var pIndex = this . _getSinglePIndexInsideDiv ( node ) ;
if ( pIndex >= 0 || ! this . _hasChildBlockElement ( node ) ) {
if ( pIndex >= 0 || ! this . _hasChildBlockElement ( node ) ) {
if ( pIndex >= 0 ) {
if ( pIndex >= 0 ) {
let newNode = node . childNodes [ pIndex ] ;
var newNode = node . childNodes [ pIndex ] ;
node . parentNode . replaceChild ( newNode , node ) ;
node . parentNode . replaceChild ( newNode , node ) ;
purgeNode ( node ) ;
purgeNode ( node ) ;
} else {
} else {
@ -489,13 +489,13 @@ Readability.prototype = {
}
}
} else {
} else {
// EXPERIMENTAL
// EXPERIMENTAL
for ( let i = 0 , il = node . childNodes . length ; i < il ; i += 1 ) {
for ( var i = 0 , il = node . childNodes . length ; i < il ; i += 1 ) {
let childNode = node . childNodes [ i ] ;
var childNode = node . childNodes [ i ] ;
if ( ! childNode )
if ( ! childNode )
continue ;
continue ;
if ( childNode . nodeType === 3 ) { // Node.TEXT_NODE
if ( childNode . nodeType === 3 ) { // Node.TEXT_NODE
let p = doc . createElement ( 'p' ) ;
var p = doc . createElement ( 'p' ) ;
p . textContent = childNode . textContent ;
p . textContent = childNode . textContent ;
p . style . display = 'inline' ;
p . style . display = 'inline' ;
p . className = 'readability-styled' ;
p . className = 'readability-styled' ;
@ -512,11 +512,11 @@ Readability.prototype = {
*
*
* A score is determined by things like number of commas , class names , etc . Maybe eventually link density .
* A score is determined by things like number of commas , class names , etc . Maybe eventually link density .
* * /
* * /
let candidates = [ ] ;
var candidates = [ ] ;
for ( let pt = 0 ; pt < nodesToScore . length ; pt += 1 ) {
for ( var pt = 0 ; pt < nodesToScore . length ; pt += 1 ) {
let parentNode = nodesToScore [ pt ] . parentNode ;
var parentNode = nodesToScore [ pt ] . parentNode ;
let grandParentNode = parentNode ? parentNode . parentNode : null ;
var grandParentNode = parentNode ? parentNode . parentNode : null ;
let innerText = this . _getInnerText ( nodesToScore [ pt ] ) ;
var innerText = this . _getInnerText ( nodesToScore [ pt ] ) ;
if ( ! parentNode || typeof ( parentNode . tagName ) === 'undefined' )
if ( ! parentNode || typeof ( parentNode . tagName ) === 'undefined' )
continue ;
continue ;
@ -539,7 +539,7 @@ Readability.prototype = {
candidates . push ( grandParentNode ) ;
candidates . push ( grandParentNode ) ;
}
}
let contentScore = 0 ;
var contentScore = 0 ;
// Add a point for the paragraph itself as a base.
// Add a point for the paragraph itself as a base.
contentScore += 1 ;
contentScore += 1 ;
@ -559,21 +559,21 @@ Readability.prototype = {
// After we've calculated scores, loop through all of the possible
// After we've calculated scores, loop through all of the possible
// candidate nodes we found and find the one with the highest score.
// candidate nodes we found and find the one with the highest score.
let topCandidates = [ ] ;
var topCandidates = [ ] ;
for ( let c = 0 , cl = candidates . length ; c < cl ; c += 1 ) {
for ( var c = 0 , cl = candidates . length ; c < cl ; c += 1 ) {
let candidate = candidates [ c ] ;
var candidate = candidates [ c ] ;
// Scale the final candidates score based on link density. Good content
// Scale the final candidates score based on link density. Good content
// should have a relatively small link density (5% or less) and be mostly
// should have a relatively small link density (5% or less) and be mostly
// unaffected by this operation.
// unaffected by this operation.
let candidateScore = candidate . readability . contentScore * ( 1 - this . _getLinkDensity ( candidate ) ) ;
var candidateScore = candidate . readability . contentScore * ( 1 - this . _getLinkDensity ( candidate ) ) ;
candidate . readability . contentScore = candidateScore ;
candidate . readability . contentScore = candidateScore ;
this . log ( 'Candidate: ' + candidate + " (" + candidate . className + ":" +
this . log ( 'Candidate: ' + candidate + " (" + candidate . className + ":" +
candidate . id + ") with score " + candidateScore ) ;
candidate . id + ") with score " + candidateScore ) ;
for ( let t = 0 ; t < this . N _TOP _CANDIDATES ; t ++ ) {
for ( var t = 0 ; t < this . N _TOP _CANDIDATES ; t ++ ) {
let aTopCandidate = topCandidates [ t ] ;
var aTopCandidate = topCandidates [ t ] ;
if ( ! aTopCandidate || candidateScore > aTopCandidate . readability . contentScore ) {
if ( ! aTopCandidate || candidateScore > aTopCandidate . readability . contentScore ) {
topCandidates . splice ( t , 0 , candidate ) ;
topCandidates . splice ( t , 0 , candidate ) ;
@ -584,15 +584,15 @@ Readability.prototype = {
}
}
}
}
let topCandidate = topCandidates [ 0 ] || null ;
var topCandidate = topCandidates [ 0 ] || null ;
// If we still have no top candidate, just use the body as a last resort.
// If we still have no top candidate, just use the body as a last resort.
// We also have to copy the body node so it is something we can modify.
// We also have to copy the body node so it is something we can modify.
if ( topCandidate === null || topCandidate . tagName === "BODY" ) {
if ( topCandidate === null || topCandidate . tagName === "BODY" ) {
// Move all of the page's children into topCandidate
// Move all of the page's children into topCandidate
topCandidate = doc . createElement ( "DIV" ) ;
topCandidate = doc . createElement ( "DIV" ) ;
let children = page . childNodes ;
var children = page . childNodes ;
for ( let i = 0 ; i < children . length ; ++ i ) {
for ( var i = 0 ; i < children . length ; ++ i ) {
topCandidate . appendChild ( children [ i ] ) ;
topCandidate . appendChild ( children [ i ] ) ;
}
}
@ -604,16 +604,16 @@ Readability.prototype = {
// Now that we have the top candidate, look through its siblings for content
// Now that we have the top candidate, look through its siblings for content
// that might also be related. Things like preambles, content split by ads
// that might also be related. Things like preambles, content split by ads
// that we removed, etc.
// that we removed, etc.
let articleContent = doc . createElement ( "DIV" ) ;
var articleContent = doc . createElement ( "DIV" ) ;
if ( isPaging )
if ( isPaging )
articleContent . id = "readability-content" ;
articleContent . id = "readability-content" ;
let siblingScoreThreshold = Math . max ( 10 , topCandidate . readability . contentScore * 0.2 ) ;
var siblingScoreThreshold = Math . max ( 10 , topCandidate . readability . contentScore * 0.2 ) ;
let siblingNodes = topCandidate . parentNode . childNodes ;
var siblingNodes = topCandidate . parentNode . childNodes ;
for ( let s = 0 , sl = siblingNodes . length ; s < sl ; s += 1 ) {
for ( var s = 0 , sl = siblingNodes . length ; s < sl ; s += 1 ) {
let siblingNode = siblingNodes [ s ] ;
var siblingNode = siblingNodes [ s ] ;
let append = false ;
var append = false ;
this . log ( "Looking at sibling node: " + siblingNode + " (" + siblingNode . className + ":" + siblingNode . id + ")" + ( ( typeof siblingNode . readability !== 'undefined' ) ? ( " with score " + siblingNode . readability . contentScore ) : '' ) ) ;
this . log ( "Looking at sibling node: " + siblingNode + " (" + siblingNode . className + ":" + siblingNode . id + ")" + ( ( typeof siblingNode . readability !== 'undefined' ) ? ( " with score " + siblingNode . readability . contentScore ) : '' ) ) ;
this . log ( "Sibling has score " + ( siblingNode . readability ? siblingNode . readability . contentScore : 'Unknown' ) ) ;
this . log ( "Sibling has score " + ( siblingNode . readability ? siblingNode . readability . contentScore : 'Unknown' ) ) ;
@ -621,7 +621,7 @@ Readability.prototype = {
if ( siblingNode === topCandidate )
if ( siblingNode === topCandidate )
append = true ;
append = true ;
let contentBonus = 0 ;
var contentBonus = 0 ;
// Give a bonus if sibling nodes and top candidates have the example same classname
// Give a bonus if sibling nodes and top candidates have the example same classname
if ( siblingNode . className === topCandidate . className && topCandidate . className !== "" )
if ( siblingNode . className === topCandidate . className && topCandidate . className !== "" )
@ -632,9 +632,9 @@ Readability.prototype = {
append = true ;
append = true ;
if ( siblingNode . nodeName === "P" ) {
if ( siblingNode . nodeName === "P" ) {
let linkDensity = this . _getLinkDensity ( siblingNode ) ;
var linkDensity = this . _getLinkDensity ( siblingNode ) ;
let nodeContent = this . _getInnerText ( siblingNode ) ;
var nodeContent = this . _getInnerText ( siblingNode ) ;
let nodeLength = nodeContent . length ;
var nodeLength = nodeContent . length ;
if ( nodeLength > 80 && linkDensity < 0.25 ) {
if ( nodeLength > 80 && linkDensity < 0.25 ) {
append = true ;
append = true ;
@ -675,11 +675,11 @@ Readability.prototype = {
this . _prepArticle ( articleContent ) ;
this . _prepArticle ( articleContent ) ;
if ( this . _curPageNum === 1 ) {
if ( this . _curPageNum === 1 ) {
let div = doc . createElement ( "DIV" ) ;
var div = doc . createElement ( "DIV" ) ;
div . id = "readability-page-1" ;
div . id = "readability-page-1" ;
div . className = "page" ;
div . className = "page" ;
let children = articleContent . childNodes ;
var children = articleContent . childNodes ;
for ( let i = 0 ; i < children . length ; ++ i ) {
for ( var i = 0 ; i < children . length ; ++ i ) {
div . appendChild ( children [ i ] ) ;
div . appendChild ( children [ i ] ) ;
}
}
articleContent . appendChild ( div ) ;
articleContent . appendChild ( div ) ;
@ -738,23 +738,23 @@ Readability.prototype = {
* @ return String - excerpt of the article
* @ return String - excerpt of the article
* * /
* * /
_getExcerpt : function ( articleContent ) {
_getExcerpt : function ( articleContent ) {
let values = { } ;
var values = { } ;
let metaElements = this . _doc . getElementsByTagName ( "meta" ) ;
var metaElements = this . _doc . getElementsByTagName ( "meta" ) ;
// Match "description", or Twitter's "twitter:description" (Cards)
// Match "description", or Twitter's "twitter:description" (Cards)
// in name attribute.
// in name attribute.
let namePattern = /^\s*((twitter)\s*:\s*)?description\s*$/gi ;
var namePattern = /^\s*((twitter)\s*:\s*)?description\s*$/gi ;
// Match Facebook's og:description (Open Graph) in property attribute.
// Match Facebook's og:description (Open Graph) in property attribute.
let propertyPattern = /^\s*og\s*:\s*description\s*$/gi ;
var propertyPattern = /^\s*og\s*:\s*description\s*$/gi ;
// Find description tags.
// Find description tags.
for ( let i = 0 ; i < metaElements . length ; i ++ ) {
for ( var i = 0 ; i < metaElements . length ; i ++ ) {
let element = metaElements [ i ] ;
var element = metaElements [ i ] ;
let elementName = element . getAttribute ( "name" ) ;
var elementName = element . getAttribute ( "name" ) ;
let elementProperty = element . getAttribute ( "property" ) ;
var elementProperty = element . getAttribute ( "property" ) ;
let name ;
var name ;
if ( namePattern . test ( elementName ) ) {
if ( namePattern . test ( elementName ) ) {
name = elementName ;
name = elementName ;
} else if ( propertyPattern . test ( elementProperty ) ) {
} else if ( propertyPattern . test ( elementProperty ) ) {
@ -762,7 +762,7 @@ Readability.prototype = {
}
}
if ( name ) {
if ( name ) {
let content = element . getAttribute ( "content" ) ;
var content = element . getAttribute ( "content" ) ;
if ( content ) {
if ( content ) {
// Convert to lowercase and remove any whitespace
// Convert to lowercase and remove any whitespace
// so we can match below.
// so we can match below.
@ -787,7 +787,7 @@ Readability.prototype = {
}
}
// No description meta tags, use the article's first paragraph.
// No description meta tags, use the article's first paragraph.
let paragraphs = articleContent . getElementsByTagName ( "p" ) ;
var paragraphs = articleContent . getElementsByTagName ( "p" ) ;
if ( paragraphs . length > 0 ) {
if ( paragraphs . length > 0 ) {
return paragraphs [ 0 ] . textContent ;
return paragraphs [ 0 ] . textContent ;
}
}
@ -801,8 +801,8 @@ Readability.prototype = {
* @ param Element
* @ param Element
* * /
* * /
_removeScripts : function ( doc ) {
_removeScripts : function ( doc ) {
let scripts = doc . getElementsByTagName ( 'script' ) ;
var scripts = doc . getElementsByTagName ( 'script' ) ;
for ( let i = scripts . length - 1 ; i >= 0 ; i -= 1 ) {
for ( var i = scripts . length - 1 ; i >= 0 ; i -= 1 ) {
scripts [ i ] . nodeValue = "" ;
scripts [ i ] . nodeValue = "" ;
scripts [ i ] . removeAttribute ( 'src' ) ;
scripts [ i ] . removeAttribute ( 'src' ) ;
@ -819,11 +819,11 @@ Readability.prototype = {
* @ param Element
* @ param Element
* * /
* * /
_getSinglePIndexInsideDiv : function ( e ) {
_getSinglePIndexInsideDiv : function ( e ) {
let childNodes = e . childNodes ;
var childNodes = e . childNodes ;
let pIndex = - 1 ;
var pIndex = - 1 ;
for ( let i = childNodes . length ; -- i >= 0 ; ) {
for ( var i = childNodes . length ; -- i >= 0 ; ) {
let node = childNodes [ i ] ;
var node = childNodes [ i ] ;
if ( node . nodeType === Node . ELEMENT _NODE ) {
if ( node . nodeType === Node . ELEMENT _NODE ) {
if ( node . tagName !== "P" )
if ( node . tagName !== "P" )
@ -847,9 +847,9 @@ Readability.prototype = {
* @ param Element
* @ param Element
* /
* /
_hasChildBlockElement : function ( e ) {
_hasChildBlockElement : function ( e ) {
let length = e . childNodes . length ;
var length = e . childNodes . length ;
for ( let i = 0 ; i < length ; i ++ ) {
for ( var i = 0 ; i < length ; i ++ ) {
let child = e . childNodes [ i ] ;
var child = e . childNodes [ i ] ;
if ( child . nodeType != 1 )
if ( child . nodeType != 1 )
continue ;
continue ;
@ -867,7 +867,7 @@ Readability.prototype = {
* @ return string
* @ return string
* * /
* * /
_getInnerText : function ( e , normalizeSpaces ) {
_getInnerText : function ( e , normalizeSpaces ) {
let textContent = e . textContent . replace ( this . REGEXPS . trim , "" ) ;
var textContent = e . textContent . replace ( this . REGEXPS . trim , "" ) ;
normalizeSpaces = ( typeof normalizeSpaces === 'undefined' ) ? true : normalizeSpaces ;
normalizeSpaces = ( typeof normalizeSpaces === 'undefined' ) ? true : normalizeSpaces ;
if ( normalizeSpaces ) {
if ( normalizeSpaces ) {
@ -898,7 +898,7 @@ Readability.prototype = {
* * /
* * /
_cleanStyles : function ( e ) {
_cleanStyles : function ( e ) {
e = e || this . _doc ;
e = e || this . _doc ;
let cur = e . firstChild ;
var cur = e . firstChild ;
if ( ! e )
if ( ! e )
return ;
return ;
@ -929,11 +929,11 @@ Readability.prototype = {
* @ return number ( float )
* @ return number ( float )
* * /
* * /
_getLinkDensity : function ( e ) {
_getLinkDensity : function ( e ) {
let links = e . getElementsByTagName ( "a" ) ;
var links = e . getElementsByTagName ( "a" ) ;
let textLength = this . _getInnerText ( e ) . length ;
var textLength = this . _getInnerText ( e ) . length ;
let linkLength = 0 ;
var linkLength = 0 ;
for ( let i = 0 , il = links . length ; i < il ; i += 1 ) {
for ( var i = 0 , il = links . length ; i < il ; i += 1 ) {
linkLength += this . _getInnerText ( links [ i ] ) . length ;
linkLength += this . _getInnerText ( links [ i ] ) . length ;
}
}
@ -947,14 +947,14 @@ Readability.prototype = {
* @ return string the base url
* @ return string the base url
* * /
* * /
_findBaseUrl : function ( ) {
_findBaseUrl : function ( ) {
let uri = this . _uri ;
var uri = this . _uri ;
let noUrlParams = uri . path . split ( "?" ) [ 0 ] ;
var noUrlParams = uri . path . split ( "?" ) [ 0 ] ;
let urlSlashes = noUrlParams . split ( "/" ) . reverse ( ) ;
var urlSlashes = noUrlParams . split ( "/" ) . reverse ( ) ;
let cleanedSegments = [ ] ;
var cleanedSegments = [ ] ;
let possibleType = "" ;
var possibleType = "" ;
for ( let i = 0 , slashLen = urlSlashes . length ; i < slashLen ; i += 1 ) {
for ( var i = 0 , slashLen = urlSlashes . length ; i < slashLen ; i += 1 ) {
let segment = urlSlashes [ i ] ;
var segment = urlSlashes [ i ] ;
// Split off and save anything that looks like a file type.
// Split off and save anything that looks like a file type.
if ( segment . indexOf ( "." ) !== - 1 ) {
if ( segment . indexOf ( "." ) !== - 1 ) {
@ -974,7 +974,7 @@ Readability.prototype = {
if ( segment . match ( /((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i ) && ( ( i === 1 ) || ( i === 0 ) ) )
if ( segment . match ( /((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i ) && ( ( i === 1 ) || ( i === 0 ) ) )
segment = segment . replace ( /((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i , "" ) ;
segment = segment . replace ( /((_|-)?p[a-z]*|(_|-))[0-9]{1,2}$/i , "" ) ;
let del = false ;
var del = false ;
// If this is purely a number, and it's the first or second segment,
// If this is purely a number, and it's the first or second segment,
// it's probably a page number. Remove it.
// it's probably a page number. Remove it.
@ -1006,10 +1006,10 @@ Readability.prototype = {
* @ return object ( array )
* @ return object ( array )
* * /
* * /
_findNextPageLink : function ( elem ) {
_findNextPageLink : function ( elem ) {
let uri = this . _uri ;
var uri = this . _uri ;
let possiblePages = { } ;
var possiblePages = { } ;
let allLinks = elem . getElementsByTagName ( 'a' ) ;
var allLinks = elem . getElementsByTagName ( 'a' ) ;
let articleBaseUrl = this . _findBaseUrl ( ) ;
var articleBaseUrl = this . _findBaseUrl ( ) ;
// Loop through all links, looking for hints that they may be next-page links.
// Loop through all links, looking for hints that they may be next-page links.
// Things like having "page" in their textContent, className or id, or being a child
// Things like having "page" in their textContent, className or id, or being a child
@ -1018,9 +1018,9 @@ Readability.prototype = {
// Also possible: levenshtein distance? longest common subsequence?
// Also possible: levenshtein distance? longest common subsequence?
//
//
// After we do that, assign each page a score, and
// After we do that, assign each page a score, and
for ( let i = 0 , il = allLinks . length ; i < il ; i += 1 ) {
for ( var i = 0 , il = allLinks . length ; i < il ; i += 1 ) {
let link = allLinks [ i ] ;
var link = allLinks [ i ] ;
let linkHref = allLinks [ i ] . href . replace ( /#.*$/ , '' ) . replace ( /\/$/ , '' ) ;
var linkHref = allLinks [ i ] . href . replace ( /#.*$/ , '' ) . replace ( /\/$/ , '' ) ;
// If we've already seen this page, ignore it.
// If we've already seen this page, ignore it.
if ( linkHref === "" ||
if ( linkHref === "" ||
@ -1034,7 +1034,7 @@ Readability.prototype = {
if ( uri . host !== linkHref . split ( /\/+/g ) [ 1 ] )
if ( uri . host !== linkHref . split ( /\/+/g ) [ 1 ] )
continue ;
continue ;
let linkText = this . _getInnerText ( link ) ;
var linkText = this . _getInnerText ( link ) ;
// If the linkText looks like it's not the next page, skip it.
// If the linkText looks like it's not the next page, skip it.
if ( linkText . match ( this . REGEXPS . extraneous ) || linkText . length > 25 )
if ( linkText . match ( this . REGEXPS . extraneous ) || linkText . length > 25 )
@ -1042,7 +1042,7 @@ Readability.prototype = {
// If the leftovers of the URL after removing the base URL don't contain
// If the leftovers of the URL after removing the base URL don't contain
// any digits, it's certainly not a next page link.
// any digits, it's certainly not a next page link.
let linkHrefLeftover = linkHref . replace ( articleBaseUrl , '' ) ;
var linkHrefLeftover = linkHref . replace ( articleBaseUrl , '' ) ;
if ( ! linkHrefLeftover . match ( /\d/ ) )
if ( ! linkHrefLeftover . match ( /\d/ ) )
continue ;
continue ;
@ -1052,7 +1052,7 @@ Readability.prototype = {
possiblePages [ linkHref ] . linkText += ' | ' + linkText ;
possiblePages [ linkHref ] . linkText += ' | ' + linkText ;
}
}
let linkObj = possiblePages [ linkHref ] ;
var linkObj = possiblePages [ linkHref ] ;
// If the articleBaseUrl isn't part of this URL, penalize this link. It could
// If the articleBaseUrl isn't part of this URL, penalize this link. It could
// still be the link, but the odds are lower.
// still be the link, but the odds are lower.
@ -1060,7 +1060,7 @@ Readability.prototype = {
if ( linkHref . indexOf ( articleBaseUrl ) !== 0 )
if ( linkHref . indexOf ( articleBaseUrl ) !== 0 )
linkObj . score -= 25 ;
linkObj . score -= 25 ;
let linkData = linkText + ' ' + link . className + ' ' + link . id ;
var linkData = linkText + ' ' + link . className + ' ' + link . id ;
if ( linkData . match ( this . REGEXPS . nextLink ) )
if ( linkData . match ( this . REGEXPS . nextLink ) )
linkObj . score += 50 ;
linkObj . score += 50 ;
@ -1082,12 +1082,12 @@ Readability.prototype = {
linkObj . score -= 200 ;
linkObj . score -= 200 ;
// If a parentNode contains page or paging or paginat
// If a parentNode contains page or paging or paginat
let parentNode = link . parentNode ;
var parentNode = link . parentNode ;
let positiveNodeMatch = false ;
var positiveNodeMatch = false ;
let negativeNodeMatch = false ;
var negativeNodeMatch = false ;
while ( parentNode ) {
while ( parentNode ) {
let parentNodeClassAndId = parentNode . className + ' ' + parentNode . id ;
var parentNodeClassAndId = parentNode . className + ' ' + parentNode . id ;
if ( ! positiveNodeMatch && parentNodeClassAndId && parentNodeClassAndId . match ( /pag(e|ing|inat)/i ) ) {
if ( ! positiveNodeMatch && parentNodeClassAndId && parentNodeClassAndId . match ( /pag(e|ing|inat)/i ) ) {
positiveNodeMatch = true ;
positiveNodeMatch = true ;
@ -1127,7 +1127,7 @@ Readability.prototype = {
// If the link text can be parsed as a number, give it a minor bonus, with a slight
// If the link text can be parsed as a number, give it a minor bonus, with a slight
// bias towards lower numbered pages. This is so that pages that might not have 'next'
// bias towards lower numbered pages. This is so that pages that might not have 'next'
// in their text can still get scored, and sorted properly by score.
// in their text can still get scored, and sorted properly by score.
let linkTextAsNumber = parseInt ( linkText , 10 ) ;
var linkTextAsNumber = parseInt ( linkText , 10 ) ;
if ( linkTextAsNumber ) {
if ( linkTextAsNumber ) {
// Punish 1 since we're either already there, or it's probably
// Punish 1 since we're either already there, or it's probably
// before what we want anyways.
// before what we want anyways.
@ -1142,8 +1142,8 @@ Readability.prototype = {
// Loop thrugh all of our possible pages from above and find our top
// Loop thrugh all of our possible pages from above and find our top
// candidate for the next page URL. Require at least a score of 50, which
// candidate for the next page URL. Require at least a score of 50, which
// is a relatively high confidence that this page is the next link.
// is a relatively high confidence that this page is the next link.
let topPage = null ;
var topPage = null ;
for ( let page in possiblePages ) {
for ( var page in possiblePages ) {
if ( possiblePages . hasOwnProperty ( page ) ) {
if ( possiblePages . hasOwnProperty ( page ) ) {
if ( possiblePages [ page ] . score >= 50 &&
if ( possiblePages [ page ] . score >= 50 &&
( ! topPage || topPage . score < possiblePages [ page ] . score ) )
( ! topPage || topPage . score < possiblePages [ page ] . score ) )
@ -1152,7 +1152,7 @@ Readability.prototype = {
}
}
if ( topPage ) {
if ( topPage ) {
let nextHref = topPage . href . replace ( /\/$/ , '' ) ;
var nextHref = topPage . href . replace ( /\/$/ , '' ) ;
this . log ( 'NEXT PAGE IS ' + nextHref ) ;
this . log ( 'NEXT PAGE IS ' + nextHref ) ;
this . _parsedPages [ nextHref ] = true ;
this . _parsedPages [ nextHref ] = true ;
@ -1169,7 +1169,7 @@ Readability.prototype = {
} ,
} ,
_ajax : function ( url , options ) {
_ajax : function ( url , options ) {
let request = new XMLHttpRequest ( ) ;
var request = new XMLHttpRequest ( ) ;
function respondToReadyState ( readyState ) {
function respondToReadyState ( readyState ) {
if ( request . readyState === 4 ) {
if ( request . readyState === 4 ) {
@ -1202,10 +1202,10 @@ Readability.prototype = {
} ,
} ,
_appendNextPage : function ( nextPageLink ) {
_appendNextPage : function ( nextPageLink ) {
let doc = this . _doc ;
var doc = this . _doc ;
this . _curPageNum += 1 ;
this . _curPageNum += 1 ;
let articlePage = doc . createElement ( "DIV" ) ;
var articlePage = doc . createElement ( "DIV" ) ;
articlePage . id = 'readability-page-' + this . _curPageNum ;
articlePage . id = 'readability-page-' + this . _curPageNum ;
articlePage . className = 'page' ;
articlePage . className = 'page' ;
articlePage . innerHTML = '<p class="page-separator" title="Page ' + this . _curPageNum + '">§</p>' ;
articlePage . innerHTML = '<p class="page-separator" title="Page ' + this . _curPageNum + '">§</p>' ;
@ -1213,7 +1213,7 @@ Readability.prototype = {
doc . getElementById ( "readability-content" ) . appendChild ( articlePage ) ;
doc . getElementById ( "readability-content" ) . appendChild ( articlePage ) ;
if ( this . _curPageNum > this . MAX _PAGES ) {
if ( this . _curPageNum > this . MAX _PAGES ) {
let nextPageMarkup = "<div style='text-align: center'><a href='" + nextPageLink + "'>View Next Page</a></div>" ;
var nextPageMarkup = "<div style='text-align: center'><a href='" + nextPageLink + "'>View Next Page</a></div>" ;
articlePage . innerHTML = articlePage . innerHTML + nextPageMarkup ;
articlePage . innerHTML = articlePage . innerHTML + nextPageMarkup ;
return ;
return ;
}
}
@ -1225,7 +1225,7 @@ Readability.prototype = {
success : function ( r ) {
success : function ( r ) {
// First, check to see if we have a matching ETag in headers - if we do, this is a duplicate page.
// First, check to see if we have a matching ETag in headers - if we do, this is a duplicate page.
let eTag = r . getResponseHeader ( 'ETag' ) ;
var eTag = r . getResponseHeader ( 'ETag' ) ;
if ( eTag ) {
if ( eTag ) {
if ( eTag in this . _pageETags ) {
if ( eTag in this . _pageETags ) {
this . log ( "Exact duplicate page found via ETag. Aborting." ) ;
this . log ( "Exact duplicate page found via ETag. Aborting." ) ;
@ -1237,7 +1237,7 @@ Readability.prototype = {
}
}
// TODO: this ends up doubling up page numbers on NYTimes articles. Need to generically parse those away.
// TODO: this ends up doubling up page numbers on NYTimes articles. Need to generically parse those away.
let page = doc . createElement ( "DIV" ) ;
var page = doc . createElement ( "DIV" ) ;
// Do some preprocessing to our HTML to make it ready for appending.
// Do some preprocessing to our HTML to make it ready for appending.
// - Remove any script tags. Swap and reswap newlines with a unicode
// - Remove any script tags. Swap and reswap newlines with a unicode
@ -1247,7 +1247,7 @@ Readability.prototype = {
// - Turn all double br's into p's - was handled by prepDocument in the original view.
// - Turn all double br's into p's - was handled by prepDocument in the original view.
// Maybe in the future abstract out prepDocument to work for both the original document
// Maybe in the future abstract out prepDocument to work for both the original document
// and AJAX-added pages.
// and AJAX-added pages.
let responseHtml = r . responseText . replace ( /\n/g , '\uffff' ) . replace ( /<script.*?>.*?<\/script>/gi , '' ) ;
var responseHtml = r . responseText . replace ( /\n/g , '\uffff' ) . replace ( /<script.*?>.*?<\/script>/gi , '' ) ;
responseHtml = responseHtml . replace ( /\n/g , '\uffff' ) . replace ( /<script.*?>.*?<\/script>/gi , '' ) ;
responseHtml = responseHtml . replace ( /\n/g , '\uffff' ) . replace ( /<script.*?>.*?<\/script>/gi , '' ) ;
responseHtml = responseHtml . replace ( /\uffff/g , '\n' ) . replace ( /<(\/?)noscript/gi , '<$1div' ) ;
responseHtml = responseHtml . replace ( /\uffff/g , '\n' ) . replace ( /<(\/?)noscript/gi , '<$1div' ) ;
responseHtml = responseHtml . replace ( this . REGEXPS . replaceFonts , '<$1span>' ) ;
responseHtml = responseHtml . replace ( this . REGEXPS . replaceFonts , '<$1span>' ) ;
@ -1259,11 +1259,11 @@ Readability.prototype = {
// disable as necessary at the end of grabArticle.
// disable as necessary at the end of grabArticle.
this . _flags = 0x1 | 0x2 | 0x4 ;
this . _flags = 0x1 | 0x2 | 0x4 ;
let nextPageLink = this . _findNextPageLink ( page ) ;
var nextPageLink = this . _findNextPageLink ( page ) ;
// NOTE: if we end up supporting _appendNextPage(), we'll need to
// NOTE: if we end up supporting _appendNextPage(), we'll need to
// change this call to be async
// change this call to be async
let content = this . _grabArticle ( page ) ;
var content = this . _grabArticle ( page ) ;
if ( ! content ) {
if ( ! content ) {
this . log ( "No content found in page to append. Aborting." ) ;
this . log ( "No content found in page to append. Aborting." ) ;
@ -1273,10 +1273,10 @@ Readability.prototype = {
// Anti-duplicate mechanism. Essentially, get the first paragraph of our new page.
// Anti-duplicate mechanism. Essentially, get the first paragraph of our new page.
// Compare it against all of the the previous document's we've gotten. If the previous
// Compare it against all of the the previous document's we've gotten. If the previous
// document contains exactly the innerHTML of this first paragraph, it's probably a duplicate.
// document contains exactly the innerHTML of this first paragraph, it's probably a duplicate.
let firstP = content . getElementsByTagName ( "P" ) . length ? content . getElementsByTagName ( "P" ) [ 0 ] : null ;
var firstP = content . getElementsByTagName ( "P" ) . length ? content . getElementsByTagName ( "P" ) [ 0 ] : null ;
if ( firstP && firstP . innerHTML . length > 100 ) {
if ( firstP && firstP . innerHTML . length > 100 ) {
for ( let i = 1 ; i <= this . _curPageNum ; i += 1 ) {
for ( var i = 1 ; i <= this . _curPageNum ; i += 1 ) {
let rPage = doc . getElementById ( 'readability-page-' + i ) ;
var rPage = doc . getElementById ( 'readability-page-' + i ) ;
if ( rPage && rPage . innerHTML . indexOf ( firstP . innerHTML ) !== - 1 ) {
if ( rPage && rPage . innerHTML . indexOf ( firstP . innerHTML ) !== - 1 ) {
this . log ( 'Duplicate of page ' + i + ' - skipping.' ) ;
this . log ( 'Duplicate of page ' + i + ' - skipping.' ) ;
articlePage . style . display = 'none' ;
articlePage . style . display = 'none' ;
@ -1316,7 +1316,7 @@ Readability.prototype = {
if ( ! this . _flagIsActive ( this . FLAG _WEIGHT _CLASSES ) )
if ( ! this . _flagIsActive ( this . FLAG _WEIGHT _CLASSES ) )
return 0 ;
return 0 ;
let weight = 0 ;
var weight = 0 ;
// Look for a special classname
// Look for a special classname
if ( typeof ( e . className ) === 'string' && e . className !== '' ) {
if ( typeof ( e . className ) === 'string' && e . className !== '' ) {
@ -1348,14 +1348,14 @@ Readability.prototype = {
* @ return void
* @ return void
* * /
* * /
_clean : function ( e , tag ) {
_clean : function ( e , tag ) {
let targetList = e . getElementsByTagName ( tag ) ;
var targetList = e . getElementsByTagName ( tag ) ;
let isEmbed = ( tag === 'object' || tag === 'embed' ) ;
var isEmbed = ( tag === 'object' || tag === 'embed' ) ;
for ( let y = targetList . length - 1 ; y >= 0 ; y -= 1 ) {
for ( var y = targetList . length - 1 ; y >= 0 ; y -= 1 ) {
// Allow youtube and vimeo videos through as people usually want to see those.
// Allow youtube and vimeo videos through as people usually want to see those.
if ( isEmbed ) {
if ( isEmbed ) {
let attributeValues = "" ;
var attributeValues = "" ;
for ( let i = 0 , il = targetList [ y ] . attributes . length ; i < il ; i += 1 ) {
for ( var i = 0 , il = targetList [ y ] . attributes . length ; i < il ; i += 1 ) {
attributeValues += targetList [ y ] . attributes [ i ] . value + '|' ;
attributeValues += targetList [ y ] . attributes [ i ] . value + '|' ;
}
}
@ -1382,17 +1382,17 @@ Readability.prototype = {
if ( ! this . _flagIsActive ( this . FLAG _CLEAN _CONDITIONALLY ) )
if ( ! this . _flagIsActive ( this . FLAG _CLEAN _CONDITIONALLY ) )
return ;
return ;
let tagsList = e . getElementsByTagName ( tag ) ;
var tagsList = e . getElementsByTagName ( tag ) ;
let curTagsLength = tagsList . length ;
var curTagsLength = tagsList . length ;
// Gather counts for other typical elements embedded within.
// Gather counts for other typical elements embedded within.
// Traverse backwards so we can remove nodes at the same time
// Traverse backwards so we can remove nodes at the same time
// without effecting the traversal.
// without effecting the traversal.
//
//
// TODO: Consider taking into account original contentScore here.
// TODO: Consider taking into account original contentScore here.
for ( let i = curTagsLength - 1 ; i >= 0 ; i -= 1 ) {
for ( var i = curTagsLength - 1 ; i >= 0 ; i -= 1 ) {
let weight = this . _getClassWeight ( tagsList [ i ] ) ;
var weight = this . _getClassWeight ( tagsList [ i ] ) ;
let contentScore = 0 ;
var contentScore = 0 ;
this . log ( "Cleaning Conditionally " + tagsList [ i ] + " (" + tagsList [ i ] . className + ":" + tagsList [ i ] . id + ")" ) ;
this . log ( "Cleaning Conditionally " + tagsList [ i ] + " (" + tagsList [ i ] . className + ":" + tagsList [ i ] . id + ")" ) ;
@ -1402,21 +1402,21 @@ Readability.prototype = {
// If there are not very many commas, and the number of
// If there are not very many commas, and the number of
// non-paragraph elements is more than paragraphs or other
// non-paragraph elements is more than paragraphs or other
// ominous signs, remove the element.
// ominous signs, remove the element.
let p = tagsList [ i ] . getElementsByTagName ( "p" ) . length ;
var p = tagsList [ i ] . getElementsByTagName ( "p" ) . length ;
let img = tagsList [ i ] . getElementsByTagName ( "img" ) . length ;
var img = tagsList [ i ] . getElementsByTagName ( "img" ) . length ;
let li = tagsList [ i ] . getElementsByTagName ( "li" ) . length - 100 ;
var li = tagsList [ i ] . getElementsByTagName ( "li" ) . length - 100 ;
let input = tagsList [ i ] . getElementsByTagName ( "input" ) . length ;
var input = tagsList [ i ] . getElementsByTagName ( "input" ) . length ;
let embedCount = 0 ;
var embedCount = 0 ;
let embeds = tagsList [ i ] . getElementsByTagName ( "embed" ) ;
var embeds = tagsList [ i ] . getElementsByTagName ( "embed" ) ;
for ( let ei = 0 , il = embeds . length ; ei < il ; ei += 1 ) {
for ( var ei = 0 , il = embeds . length ; ei < il ; ei += 1 ) {
if ( embeds [ ei ] . src . search ( this . REGEXPS . videos ) === - 1 )
if ( embeds [ ei ] . src . search ( this . REGEXPS . videos ) === - 1 )
embedCount += 1 ;
embedCount += 1 ;
}
}
let linkDensity = this . _getLinkDensity ( tagsList [ i ] ) ;
var linkDensity = this . _getLinkDensity ( tagsList [ i ] ) ;
let contentLength = this . _getInnerText ( tagsList [ i ] ) . length ;
var contentLength = this . _getInnerText ( tagsList [ i ] ) . length ;
let toRemove = false ;
var toRemove = false ;
if ( img > p ) {
if ( img > p ) {
toRemove = true ;
toRemove = true ;
@ -1447,9 +1447,9 @@ Readability.prototype = {
* @ return void
* @ return void
* * /
* * /
_cleanHeaders : function ( e ) {
_cleanHeaders : function ( e ) {
for ( let headerIndex = 1 ; headerIndex < 3 ; headerIndex += 1 ) {
for ( var headerIndex = 1 ; headerIndex < 3 ; headerIndex += 1 ) {
let headers = e . getElementsByTagName ( 'h' + headerIndex ) ;
var headers = e . getElementsByTagName ( 'h' + headerIndex ) ;
for ( let i = headers . length - 1 ; i >= 0 ; i -= 1 ) {
for ( var i = headers . length - 1 ; i >= 0 ; i -= 1 ) {
if ( this . _getClassWeight ( headers [ i ] ) < 0 || this . _getLinkDensity ( headers [ i ] ) > 0.33 )
if ( this . _getClassWeight ( headers [ i ] ) < 0 || this . _getLinkDensity ( headers [ i ] ) > 0.33 )
headers [ i ] . parentNode . removeChild ( headers [ i ] ) ;
headers [ i ] . parentNode . removeChild ( headers [ i ] ) ;
}
}
@ -1492,12 +1492,12 @@ Readability.prototype = {
// this._parsedPages[uri.spec.replace(/\/$/, '')] = true;
// this._parsedPages[uri.spec.replace(/\/$/, '')] = true;
// Pull out any possible next page link first.
// Pull out any possible next page link first.
// let nextPageLink = this._findNextPageLink(doc.body);
// var nextPageLink = this._findNextPageLink(doc.body);
this . _prepDocument ( ) ;
this . _prepDocument ( ) ;
let articleTitle = this . _getArticleTitle ( ) ;
var articleTitle = this . _getArticleTitle ( ) ;
let articleContent = this . _grabArticle ( ) ;
var articleContent = this . _grabArticle ( ) ;
if ( ! articleContent )
if ( ! articleContent )
return null ;
return null ;
@ -1511,7 +1511,7 @@ Readability.prototype = {
// }).bind(this), 500);
// }).bind(this), 500);
// }
// }
let excerpt = this . _getExcerpt ( articleContent ) ;
var excerpt = this . _getExcerpt ( articleContent ) ;
return { title : articleTitle ,
return { title : articleTitle ,
byline : this . _articleByline ,
byline : this . _articleByline ,