|
|
@ -11,26 +11,26 @@ import {
|
|
|
|
} from '../utils'
|
|
|
|
} from '../utils'
|
|
|
|
|
|
|
|
|
|
|
|
const GenericTitleExtractor = {
|
|
|
|
const GenericTitleExtractor = {
|
|
|
|
extract($, cachedMeta) {
|
|
|
|
extract($, url, cachedMeta) {
|
|
|
|
// First, check to see if we have a matching meta tag that we can make
|
|
|
|
// First, check to see if we have a matching meta tag that we can make
|
|
|
|
// use of that is strongly associated with the headline.
|
|
|
|
// use of that is strongly associated with the headline.
|
|
|
|
let title
|
|
|
|
let title
|
|
|
|
|
|
|
|
|
|
|
|
title = extractFromMeta($, STRONG_TITLE_META_TAGS, cachedMeta)
|
|
|
|
title = extractFromMeta($, STRONG_TITLE_META_TAGS, cachedMeta)
|
|
|
|
if (title) return cleanTitle(title)
|
|
|
|
if (title) return cleanTitle(title, url, $)
|
|
|
|
|
|
|
|
|
|
|
|
// Second, look through our content selectors for the most likely
|
|
|
|
// Second, look through our content selectors for the most likely
|
|
|
|
// article title that is strongly associated with the headline.
|
|
|
|
// article title that is strongly associated with the headline.
|
|
|
|
title = extractFromSelectors($, STRONG_TITLE_SELECTORS)
|
|
|
|
title = extractFromSelectors($, STRONG_TITLE_SELECTORS)
|
|
|
|
if (title) return cleanTitle(title)
|
|
|
|
if (title) return cleanTitle(title, url, $)
|
|
|
|
|
|
|
|
|
|
|
|
// Third, check for weaker meta tags that may match.
|
|
|
|
// Third, check for weaker meta tags that may match.
|
|
|
|
title = extractFromMeta($, WEAK_TITLE_META_TAGS, cachedMeta)
|
|
|
|
title = extractFromMeta($, WEAK_TITLE_META_TAGS, cachedMeta)
|
|
|
|
if (title) return cleanTitle(title)
|
|
|
|
if (title) return cleanTitle(title, url, $)
|
|
|
|
|
|
|
|
|
|
|
|
// Last, look for weaker selector tags that may match.
|
|
|
|
// Last, look for weaker selector tags that may match.
|
|
|
|
title = extractFromSelectors($, WEAK_TITLE_SELECTORS)
|
|
|
|
title = extractFromSelectors($, WEAK_TITLE_SELECTORS)
|
|
|
|
if (title) return cleanTitle(title)
|
|
|
|
if (title) return cleanTitle(title, url, $)
|
|
|
|
|
|
|
|
|
|
|
|
// If no matches, return an empty string
|
|
|
|
// If no matches, return an empty string
|
|
|
|
return ""
|
|
|
|
return ""
|
|
|
|