Update to latest JSDOM

6 years ago · 512e1c18a7
parent 977be42d1f
commit 512e1c18a7
5 changed files with 39 additions and 31 deletions
--- a/README.md
+++ b/README.md
@ -12,9 +12,9 @@ To test local changes to Readability.js, you can use the [automated tests](#test

 Please make sure to run [eslint](http://eslint.org/) against any proposed changes when creating a pull request.

-## Usage
+## Usage on the web.

-To parse a document, you must create a new `Readability` object from a document object, and then call `parse()`. Here's an example:
+To parse a document, you must create a new `Readability` object from a DOM document object, and then call `parse()`. Here's an example:

 ```javascript
 var article = new Readability(document).parse();
@ -29,22 +29,41 @@ This `article` object will contain the following properties:
 * `byline`: author metadata
 * `dir`: content direction

-If you're using Readability on the web, you will likely be able to use a `document` reference from elsewhere (e.g. fetched via XMLHttpRequest, in a same-origin `<iframe>` you have access to, etc.).
-
-Otherwise, you would need to construct such an object using a DOM parser such as [jsdom](https://github.com/tmpvar/jsdom). While this repository contains a parser of its own (`JSDOMParser`), that is restricted to reading XML-compatible markup and therefore we do not recommend it for general use.
-
-If you're using `jsdom` to create a DOM object, you should ensure that the page doesn't run (page) scripts (avoid fetching remote resources etc.) as well as passing it the page's URI as the `url` property of the `options` object you pass the `JSDOM` constructor.
+If you're using Readability on the web, you will likely be able to use a `document` reference
+from elsewhere (e.g. fetched via XMLHttpRequest, in a same-origin `<iframe>` you have access to, etc.).

 ### Optional

-Readability's `parse()` works by modifying the DOM. This removes some elements in the web page. You could avoid this by passing the clone of the `document` object while creating a `Readability` object.
-
+Readability's `parse()` works by modifying the DOM. This removes some elements in the web page.
+You could avoid this by passing the clone of the `document` object while creating a `Readability` object.

 ```
 var documentClone = document.cloneNode(true); 
 var article = new Readability(documentClone).parse();
 ```

+## Usage from node.js
+
+In node.js, you won't generally have a DOM document object. To obtain one, you can use external
+libraries like [jsdom](https://github.com/tmpvar/jsdom). While this repository contains a parser of
+its own (`JSDOMParser`), that is restricted to reading XML-compatible markup and therefore we do
+not recommend it for general use.
+
+If you're using `jsdom` to create a DOM object, you should ensure that the page doesn't run (page)
+scripts (avoid fetching remote resources etc.) as well as passing it the page's URI as the `url`
+property of the `options` object you pass the `JSDOM` constructor.
+
+### Example:
+
+```
+var JSDOM = require('jsdom').JSDOM;
+var doc = new JSDOM("<body>Here's a bunch of text</body>", {
+  url: "https://www.example.com/the-page-i-got-the-source-from",
+});
+let reader = new Readability(doc);
+let article = reader.parse();
+```
+
 ## What's Readability-readerable?

 It's a quick-and-dirty way of figuring out if it's plausible that the contents of a given
--- a/package.json
+++ b/package.json
@ -28,7 +28,7 @@
    "eslint": ">=4.2",
    "htmltidy2": "^0.3.0",
    "js-beautify": "^1.5.5",
-    "jsdom": "^7.0",
+    "jsdom": "^13.1",
    "matcha": "^0.6.0",
    "mocha": "^2.2.*"
  }
--- a/test/generate-testcase.js
+++ b/test/generate-testcase.js
@ -2,7 +2,7 @@ var debug = false;

 var path = require("path");
 var fs = require("fs");
-var jsdom = require("jsdom").jsdom;
+var JSDOM = require("jsdom").JSDOM;
 var prettyPrint = require("./utils").prettyPrint;
 var serializeDocument = require("jsdom").serializeDocument;
 var http = require("http");
@ -81,7 +81,7 @@ function fetchSource(url, callbackFn) {
 }

 function sanitizeSource(html, callbackFn) {
-  htmltidy(serializeDocument(jsdom(html)), {
+  htmltidy(serializeDocument(new JSDOM(html)), {
    "indent": true,
    "indent-spaces": 4,
    "numeric-entities": true,
@ -128,11 +128,8 @@ function runReadability(source, destPath, metadataDestPath) {
  }
  // Use jsdom for isProbablyReaderable because it supports querySelectorAll
  try {
-    var jsdomDoc = jsdom(source, {
-      features: {
-        FetchExternalResources: false,
-        ProcessExternalResources: false
-      }
+    var jsdomDoc = new JSDOM(source, {
+      url: uri,
    });
    myReader = new Readability(jsdomDoc);
    readerable = myReader.isProbablyReaderable();
--- a/test/test-isProbablyReaderable.js
+++ b/test/test-isProbablyReaderable.js
@ -1,4 +1,4 @@
-var jsdom = require("jsdom").jsdom;
+var JSDOM = require("jsdom").JSDOM;
 var chai = require("chai");
 chai.config.includeStack = true;
 var expect = chai.expect;
@ -10,13 +10,9 @@ describe("isProbablyReaderable - test pages", function() {
  testPages.forEach(function(testPage) {
    var uri = "http://fakehost/test/page.html";
    describe(testPage.dir, function() {
-      var doc = jsdom(testPage.source, {
+      var doc = new JSDOM(testPage.source, {
        url: uri,
-        features: {
-          FetchExternalResources: false,
-          ProcessExternalResources: false,
-        },
-      });
+      }).window.document;
      var expected = testPage.expectedMetadata.readerable;
      it("The result should " + (expected ? "" : "not ") + "be readerable", function() {
        expect(readabilityCheck.isProbablyReaderable(doc)).eql(expected);
--- a/test/test-readability.js
+++ b/test/test-readability.js
@ -1,4 +1,4 @@
-var jsdom = require("jsdom").jsdom;
+var JSDOM = require("jsdom").JSDOM;
 var chai = require("chai");
 chai.config.includeStack = true;
 var expect = chai.expect;
@ -219,13 +219,9 @@ describe("Test pages", function() {
      var uri = "http://fakehost/test/page.html";

      runTestsWithItems("jsdom", function(source) {
-        var doc = jsdom(source, {
+        var doc = new JSDOM(source, {
          url: uri,
-          features: {
-            FetchExternalResources: false,
-            ProcessExternalResources: false
-          }
-        });
+        }).window.document;
        removeCommentNodesRecursively(doc);
        return doc;
      }, testPage.source, testPage.expectedContent, testPage.expectedMetadata);