fresh run of prettier; remove NOTES.md (#233)

5 years ago · 663cc45bf4
parent 244d17ddd3
commit 663cc45bf4
17 changed files with 92 additions and 165 deletions
--- a/NOTES.md
+++ b/NOTES.md
@ -1,84 +0,0 @@
-Each extractor should ultimately be an object that exports like so:
-
-```javascript
-import GenericContentExtractor from './content/extractor'
-import GenericTitleExtractor from './title/extractor'
-import GenericAuthorExtractor from './author/extractor'
-import GenericDatePublishedExtractor from './date-published/extractor'
-import GenericDekExtractor from './dek/extractor'
-import GenericLeadImageUrlExtractor from './lead-image-url/extractor'
-
-const GenericExtractor = {
-  content: GenericContentExtractor,
-  title: GenericTitleExtractor,
-  author: GenericAuthorExtractor,
-  datePublished: GenericDatePublishedExtractor,
-  dek: GenericDekExtractor,
-  leadImageUrl: GenericLeadImageUrlExtractor,
-}
-```
-
-Custom parsers can then be merged with the generic parser to fill in gaps in their implementations. E.g:
-
-```javascript
-import NYMagContentExtractor from '...'
-import NYMagTitleExtractor from '...'
-
-const NYMagExtractor = {
-  content: NYMagContentExtractor,
-  title: NYMagTitleExtractor,
-}
-
-const Extractor = {
-  ...GenericExtractor,
-  ...NYMagExtractor
-}
-
-```
-
-# Declarative Custom Extractors
-
-My goal is be to create declarative extractors that describe what rather than how. So, for example:
-
-```javascript
-NYMagExtractor = {
-  content: {
-    // Order by most likely. Extractor will stop on first occurrence
-    selectors: [
-      'div.article-content',
-      'section.body',
-      'article.article',
-    ],
-
-    // Selectors to remove from the extracted content
-    clean: [
-      '.ad',
-    ],
-
-    // Array of tranformations to make on matched elements
-    // Each item in the array is an object. They key is the 
-    // selector, the value is a tranformation function
-    // for the matching node.
-    transforms: [
-      // Convert h1s to h2s
-      {
-        'h1': ($node) => convertNodeTo($node, $, 'h2')
-      },
-
-      // Convert lazy-loaded noscript images to figures
-      {
-        'noscript': ($node) => {
-          const $children = $node.children()
-          if ($children.length === 1 && $children.get(0).tagName === 'img') {
-            convertNodeTo($node, $, 'figure')
-          }
-        }
-      }
-    ]
-  },
-
-  title: [
-    'h1',
-  ]
-}
-```
--- a/scripts/templates/custom-extractor.js
+++ b/scripts/templates/custom-extractor.js
@ -1,6 +1,6 @@
 import template from './index';

-export default function (hostname, name) {
+export default function(hostname, name) {
  return template`
    export const ${name} = {
      domain: '${hostname}',
--- a/scripts/templates/index.js
+++ b/scripts/templates/index.js
@ -13,9 +13,10 @@ export default function template(strings, ...values) {
    indentLevel = /^\s{0,2}(.+)$/g;
  }

-  return body.split('\n')
+  return body
+    .split('\n')
    .slice(1)
-    .map((line) => {
+    .map(line => {
      line = line.replace(indentLevel, '$1');

      if (trailingWhitespace.test(line)) {
--- a/src/cleaners/date-published.test.js
+++ b/src/cleaners/date-published.test.js
@ -1,9 +1,7 @@
 import assert from 'assert';
 import moment from 'moment-timezone';

-import cleanDatePublished, {
-  cleanDateString,
-} from './date-published';
+import cleanDatePublished, { cleanDateString } from './date-published';

 describe('cleanDatePublished(dateString)', () => {
  it('returns a date', () => {
--- a/src/extractors/custom/www.fastcompany.com/index.js
+++ b/src/extractors/custom/www.fastcompany.com/index.js
@ -2,38 +2,26 @@ export const WwwFastcompanyComExtractor = {
  domain: 'www.fastcompany.com',

  title: {
-    selectors: [
-      'h1',
-    ],
+    selectors: ['h1'],
  },

  author: {
-    selectors: [
-      '.post__by',
-    ],
+    selectors: ['.post__by'],
  },

  date_published: {
-    selectors: [
-      ['meta[name="article:published_time"]', 'value'],
-    ],
+    selectors: [['meta[name="article:published_time"]', 'value']],
  },

  dek: {
-    selectors: [
-      '.post__deck',
-    ],
+    selectors: ['.post__deck'],
  },

  lead_image_url: {
-    selectors: [
-      ['meta[name="og:image"]', 'value'],
-    ],
+    selectors: [['meta[name="og:image"]', 'value']],
  },

  content: {
-    selectors: [
-      '.post__article',
-    ],
+    selectors: ['.post__article'],
  },
 };
--- a/src/extractors/custom/www.fastcompany.com/index.test.js
+++ b/src/extractors/custom/www.fastcompany.com/index.test.js
@ -15,10 +15,10 @@ describe('WwwFastcompanyComExtractor', () => {
    beforeAll(() => {
      url =
        'https://www.fastcompany.com/3067012/the-only-five-email-folders-your-inbox-will-ever-need';
-      const html =
-        fs.readFileSync('./fixtures/www.fastcompany.com/1547124373499.html');
-      result =
-        Mercury.parse(url, html, { fallback: false });
+      const html = fs.readFileSync(
+        './fixtures/www.fastcompany.com/1547124373499.html'
+      );
+      result = Mercury.parse(url, html, { fallback: false });
    });

    it('is selected properly', () => {
@ -30,53 +30,62 @@ describe('WwwFastcompanyComExtractor', () => {
    });

    it('returns the title', async () => {
-    // To pass this test, fill out the title selector
-    // in ./src/extractors/custom/www.fastcompany.com/index.js.
+      // To pass this test, fill out the title selector
+      // in ./src/extractors/custom/www.fastcompany.com/index.js.
      const { title } = await result;

-    // Update these values with the expected values from
-    // the article.
-      assert.equal(title, 'The Only Five Email Folders Your Inbox Will Ever Need');
+      // Update these values with the expected values from
+      // the article.
+      assert.equal(
+        title,
+        'The Only Five Email Folders Your Inbox Will Ever Need'
+      );
    });

    it('returns the author', async () => {
-    // To pass this test, fill out the author selector
-    // in ./src/extractors/custom/www.fastcompany.com/index.js.
+      // To pass this test, fill out the author selector
+      // in ./src/extractors/custom/www.fastcompany.com/index.js.
      const { author } = await result;

-    // Update these values with the expected values from
-    // the article.
+      // Update these values with the expected values from
+      // the article.
      assert.equal(author, 'Zach Hanlon');
    });

    it('returns the date_published', async () => {
-    // To pass this test, fill out the date_published selector
-    // in ./src/extractors/custom/www.fastcompany.com/index.js.
+      // To pass this test, fill out the date_published selector
+      // in ./src/extractors/custom/www.fastcompany.com/index.js.
      const { date_published } = await result;

-    // Update these values with the expected values from
-    // the article.
+      // Update these values with the expected values from
+      // the article.
      assert.equal(date_published, '2017-01-09T05:00:00.000Z');
    });

    it('returns the dek', async () => {
-    // To pass this test, fill out the dek selector
-    // in ./src/extractors/custom/www.fastcompany.com/index.js.
+      // To pass this test, fill out the dek selector
+      // in ./src/extractors/custom/www.fastcompany.com/index.js.
      const { dek } = await result;

-    // Update these values with the expected values from
-    // the article.
-      assert.equal(dek, 'Stop “organizing” your emails by subject and start thinking of them in terms of deadlines.');
+      // Update these values with the expected values from
+      // the article.
+      assert.equal(
+        dek,
+        'Stop “organizing” your emails by subject and start thinking of them in terms of deadlines.'
+      );
    });

    it('returns the lead_image_url', async () => {
-    // To pass this test, fill out the lead_image_url selector
-    // in ./src/extractors/custom/www.fastcompany.com/index.js.
+      // To pass this test, fill out the lead_image_url selector
+      // in ./src/extractors/custom/www.fastcompany.com/index.js.
      const { lead_image_url } = await result;

-    // Update these values with the expected values from
-    // the article.
-      assert.equal(lead_image_url, 'https://images.fastcompany.net/image/upload/w_1280,f_auto,q_auto,fl_lossy/fc/3067012-poster-p-1-the-only-five-email-folders-your-inbox-will-ever-need.jpg');
+      // Update these values with the expected values from
+      // the article.
+      assert.equal(
+        lead_image_url,
+        'https://images.fastcompany.net/image/upload/w_1280,f_auto,q_auto,fl_lossy/fc/3067012-poster-p-1-the-only-five-email-folders-your-inbox-will-ever-need.jpg'
+      );
    });

    it('returns the content', async () => {
@ -88,11 +97,19 @@ describe('WwwFastcompanyComExtractor', () => {

      const $ = cheerio.load(content || '');

-      const first13 = excerptContent($('*').first().text(), 13);
+      const first13 = excerptContent(
+        $('*')
+          .first()
+          .text(),
+        13
+      );

      // Update these values with the expected values from
      // the article.
-      assert.equal(first13, 'For years, my approach to email was like slaying a hydra. For every');
+      assert.equal(
+        first13,
+        'For years, my approach to email was like slaying a hydra. For every'
+      );
    });
  });
 });
--- a/src/extractors/custom/www.fortinet.com/index.js
+++ b/src/extractors/custom/www.fortinet.com/index.js
@ -18,10 +18,12 @@ export const WwwFortinetComExtractor = {
  },

  content: {
-    selectors: ['div.responsivegrid.aem-GridColumn.aem-GridColumn--default--12'],
+    selectors: [
+      'div.responsivegrid.aem-GridColumn.aem-GridColumn--default--12',
+    ],

    transforms: {
-      noscript: ($node) => {
+      noscript: $node => {
        const $children = $node.children();
        if ($children.length === 1 && $children.get(0).tagName === 'img') {
          return 'figure';
--- a/src/extractors/custom/www.fortinet.com/index.test.js
+++ b/src/extractors/custom/www.fortinet.com/index.test.js
@ -15,7 +15,9 @@ describe('WwwFortinetComExtractor', () => {
    beforeAll(() => {
      url =
        'https://www.fortinet.com/blog/threat-research/defeating-an-android-packer-with-frida.html';
-      const html = fs.readFileSync('./fixtures/www.fortinet.com/1546954846985.html');
+      const html = fs.readFileSync(
+        './fixtures/www.fortinet.com/1546954846985.html'
+      );
      result = Mercury.parse(url, html, { fallback: false });
    });

@ -34,7 +36,10 @@ describe('WwwFortinetComExtractor', () => {

      // Update these values with the expected values from
      // the article.
-      assert.equal(title, 'How-to Guide: Defeating an Android Packer with FRIDA');
+      assert.equal(
+        title,
+        'How-to Guide: Defeating an Android Packer with FRIDA'
+      );
    });

    it('returns the author', async () => {
--- a/src/extractors/custom/www.washingtonpost.com/index.js
+++ b/src/extractors/custom/www.washingtonpost.com/index.js
@ -6,15 +6,11 @@ export const WwwWashingtonpostComExtractor = {
  },

  author: {
-    selectors: [
-      '.pb-author-name',
-    ],
+    selectors: ['.pb-author-name'],
  },

  date_published: {
-    selectors: [
-      ['.author-timestamp[itemprop="datePublished"]', 'content'],
-    ],
+    selectors: [['.author-timestamp[itemprop="datePublished"]', 'content']],
  },

  dek: {
--- a/src/extractors/custom/www.washingtonpost.com/index.test.js
+++ b/src/extractors/custom/www.washingtonpost.com/index.test.js
@ -15,10 +15,10 @@ describe('WwwWashingtonpostComExtractor', () => {
    beforeAll(() => {
      url =
        'https://www.washingtonpost.com/news/opinions/wp/2018/10/29/enough-platitudes-lets-name-names/';
-      const html =
-        fs.readFileSync('./fixtures/www.washingtonpost.com/1546958901450.html');
-      result =
-        Mercury.parse(url, html, { fallback: false });
+      const html = fs.readFileSync(
+        './fixtures/www.washingtonpost.com/1546958901450.html'
+      );
+      result = Mercury.parse(url, html, { fallback: false });
    });

    it('is selected properly', async () => {
--- a/src/extractors/detect-by-html.js
+++ b/src/extractors/detect-by-html.js
@ -1,4 +1,4 @@
-import { MediumExtractor, BloggerExtractor } from "./custom";
+import { MediumExtractor, BloggerExtractor } from './custom';

 const Detectors = {
  'meta[name="al:ios:app_name"][value="Medium"]': MediumExtractor,
--- a/src/extractors/generic/content/scoring/merge-siblings.js
+++ b/src/extractors/generic/content/scoring/merge-siblings.js
@ -57,13 +57,15 @@ export default function mergeSiblings($candidate, topScore, $) {

          if (newScore >= siblingScoreThreshold) {
            return wrappingDiv.append($sibling);
-          } if (sibling.tagName === 'p') {
+          }
+          if (sibling.tagName === 'p') {
            const siblingContent = $sibling.text();
            const siblingContentLength = textLength(siblingContent);

            if (siblingContentLength > 80 && density < 0.25) {
              return wrappingDiv.append($sibling);
-            } if (
+            }
+            if (
              siblingContentLength <= 80 &&
              density === 0 &&
              hasSentenceEnd(siblingContent)
--- a/src/extractors/generic/content/scoring/score-node.js
+++ b/src/extractors/generic/content/scoring/score-node.js
@ -15,13 +15,17 @@ export default function scoreNode($node) {
  // Could save doing that regex test on every node – AP
  if (PARAGRAPH_SCORE_TAGS.test(tagName)) {
    return scoreParagraph($node);
-  } if (tagName.toLowerCase() === 'div') {
+  }
+  if (tagName.toLowerCase() === 'div') {
    return 5;
-  } if (CHILD_CONTENT_TAGS.test(tagName)) {
+  }
+  if (CHILD_CONTENT_TAGS.test(tagName)) {
    return 3;
-  } if (BAD_TAGS.test(tagName)) {
+  }
+  if (BAD_TAGS.test(tagName)) {
    return -3;
-  } if (tagName.toLowerCase() === 'th') {
+  }
+  if (tagName.toLowerCase() === 'th') {
    return -5;
  }

--- a/src/resource/utils/fetch-resource.test.js
+++ b/src/resource/utils/fetch-resource.test.js
@ -2,10 +2,7 @@ import assert from 'assert';
 import URL from 'url';

 import { record } from 'test-helpers';
-import fetchResource, {
-  baseDomain,
-  validateResponse,
-} from './fetch-resource';
+import fetchResource, { baseDomain, validateResponse } from './fetch-resource';
 import { MAX_CONTENT_LENGTH } from './constants';

 describe('fetchResource(url)', () => {
--- a/src/utils/dom/clean-tags.js
+++ b/src/utils/dom/clean-tags.js
@ -77,7 +77,6 @@ function removeUnlessContent($node, $, weight) {
    // Too many script tags, not enough content.
    if (scriptCount > 0 && contentLength < 150) {
      $node.remove();
-      
    }
  }
 }
--- a/src/utils/dom/link-density.js
+++ b/src/utils/dom/link-density.js
@ -13,7 +13,8 @@ export function linkDensity($node) {

  if (totalTextLength > 0) {
    return linkLength / totalTextLength;
-  } if (totalTextLength === 0 && linkLength > 0) {
+  }
+  if (totalTextLength === 0 && linkLength > 0) {
    return 1;
  }

--- a/src/utils/dom/strip-unlikely-candidates.js
+++ b/src/utils/dom/strip-unlikely-candidates.js
@ -21,7 +21,8 @@ export default function stripUnlikelyCandidates($) {
      const classAndId = `${classes || ''} ${id || ''}`;
      if (CANDIDATES_WHITELIST.test(classAndId)) {
        return;
-      } if (CANDIDATES_BLACKLIST.test(classAndId)) {
+      }
+      if (CANDIDATES_BLACKLIST.test(classAndId)) {
        $node.remove();
      }
    });