chore: remove logic for fetching meta tags with custom attrs (resource

normalizes this now
2024-11-17 03:25:31 +00:00 · 2016-09-09 13:56:06 -04:00 · 2016-09-09 13:56:06 -04:00 · 7b97559778
commit 7b97559778
parent c48e3485c0
6 changed files with 8 additions and 34 deletions
--- a/TODO.md
+++ b/TODO.md
@ -1,5 +1,4 @@
 TODO:
 - remove logic for fetching meta attrs with custom props
 - extractNextPageUrl
 - Rename all cleaners from cleanThing to clean
 - Make sure weightNodes flag is being passed properly
@ -10,6 +9,7 @@ TODO:
 - Separate constants into activity-specific folders (dom, scoring)
 DONE:
 x remove logic for fetching meta attrs with custom props
 x cleaning embed and object nodes
 x run makeLinksAbsolute on extracted content before returning
 x add option to fetch attrs in RootExtractor's select method
--- a/src/extractors/generic/index.test.js
+++ b/src/extractors/generic/index.test.js
@ -15,7 +15,6 @@ describe('GenericExtractor', () => {
        author,
        datePublished,
        dek,
        leadImageUrl,
      } = GenericExtractor.extract(
        { url: "http://latimes.com", html, metaCache: [] }
      )
@ -30,7 +29,6 @@ describe('GenericExtractor', () => {
        '2009-10-14T04:00:00.000Z'
      )
      assert.equal(dek, null)
      assert.equal(leadImageUrl, 'http://latimesblogs.latimes.com/fb.jpg')
    })
    it("extracts html and returns the article title", () => {
@ -41,7 +39,6 @@ describe('GenericExtractor', () => {
        title,
        datePublished,
        dek,
        leadImageUrl,
      } = GenericExtractor.extract(
        { url: "http://wired.com", html, metaCache: [] }
      )
@ -53,7 +50,6 @@ describe('GenericExtractor', () => {
      )
      assert.equal(datePublished, null)
      assert.equal(dek, null)
      assert.equal(leadImageUrl, 'https://www.wired.com/wp-content/uploads/2016/08/GettyImages-536814811-1200x630-e1471497753973.jpg')
    })
  })
--- a/src/extractors/generic/lead-image-url/constants.js
+++ b/src/extractors/generic/lead-image-url/constants.js
@ -2,8 +2,8 @@
 // All attributes should be lowercase for faster case-insensitive matching.
 // From most distinct to least distinct.
 export const LEAD_IMAGE_URL_META_TAGS = [
-    ['og:image', 'property', 'content'],
+    'og:image',
-    ['twitter:image', 'name', 'content'],
+    'twitter:image',
    'image_src',
 ]
--- a/src/extractors/generic/lead-image-url/fixtures/html.js
+++ b/src/extractors/generic/lead-image-url/fixtures/html.js
@ -3,7 +3,7 @@ const HTML = {
    test: `
      <html>
        <head>
-          <meta property="og:image" content="http://example.com/lead.jpg">
+          <meta name="og:image" value="http://example.com/lead.jpg">
        </head>
      </html>
    `,
@ -13,7 +13,7 @@ const HTML = {
    test: `
      <html>
        <head>
-          <meta name="twitter:image" content="http://example.com/lead.jpg">
+          <meta name="twitter:image" value="http://example.com/lead.jpg">
        </head>
      </html>
    `,
--- a/src/utils/dom/extract-from-meta.js
+++ b/src/utils/dom/extract-from-meta.js
@ -15,26 +15,14 @@ export default function extractFromMeta(
  cleanTags=true,
 ) {
  const foundNames = metaNames.filter(name => {
-    const metaType = typeof name
+    return cachedNames.indexOf(name) !== -1
    if (metaType === 'string') {
      return cachedNames.indexOf(name) !== -1
    } else if (metaType === 'object') {
      return cachedNames.indexOf(name[0]) !== 1
    }
  })
  for (let name of foundNames) {
    let type, value
-    if (typeof name === 'string') {
+    type = 'name'
-      type = 'name'
+    value = 'value'
      value = 'value'
    } else {
      type = name[1]
      value = name[2]
      name = name[0]
    }
    const nodes = $(`meta[${type}="${name}"]`)
--- a/src/utils/dom/extract-from-meta.test.js
+++ b/src/utils/dom/extract-from-meta.test.js
@ -32,16 +32,6 @@ describe('extractFromMeta($, metaNames, cachedNames, cleanTags)', () => {
    assert.equal(result, HTML.metaEmptyDupes.result)
  })
  it('accepts custom attributes', () => {
    const $ = cheerio.load(HTML.custom.test)
    const metaNames = [['foo', 'property', 'content']]
    const cachedNames = ['foo']
    const result = extractFromMeta(
      $, metaNames, cachedNames
    )
    assert.equal(result, HTML.custom.result)
  })
 })