Update to latest JSDOM

pull/509/head
Gijs Kruitbosch 6 years ago committed by Gijs
parent 977be42d1f
commit 512e1c18a7

@ -12,9 +12,9 @@ To test local changes to Readability.js, you can use the [automated tests](#test
Please make sure to run [eslint](http://eslint.org/) against any proposed changes when creating a pull request.
## Usage
## Usage on the web.
To parse a document, you must create a new `Readability` object from a document object, and then call `parse()`. Here's an example:
To parse a document, you must create a new `Readability` object from a DOM document object, and then call `parse()`. Here's an example:
```javascript
var article = new Readability(document).parse();
@ -29,22 +29,41 @@ This `article` object will contain the following properties:
* `byline`: author metadata
* `dir`: content direction
If you're using Readability on the web, you will likely be able to use a `document` reference from elsewhere (e.g. fetched via XMLHttpRequest, in a same-origin `<iframe>` you have access to, etc.).
Otherwise, you would need to construct such an object using a DOM parser such as [jsdom](https://github.com/tmpvar/jsdom). While this repository contains a parser of its own (`JSDOMParser`), that is restricted to reading XML-compatible markup and therefore we do not recommend it for general use.
If you're using `jsdom` to create a DOM object, you should ensure that the page doesn't run (page) scripts (avoid fetching remote resources etc.) as well as passing it the page's URI as the `url` property of the `options` object you pass the `JSDOM` constructor.
If you're using Readability on the web, you will likely be able to use a `document` reference
from elsewhere (e.g. fetched via XMLHttpRequest, in a same-origin `<iframe>` you have access to, etc.).
### Optional
Readability's `parse()` works by modifying the DOM. This removes some elements in the web page. You could avoid this by passing the clone of the `document` object while creating a `Readability` object.
Readability's `parse()` works by modifying the DOM. This removes some elements in the web page.
You could avoid this by passing the clone of the `document` object while creating a `Readability` object.
```
var documentClone = document.cloneNode(true);
var article = new Readability(documentClone).parse();
```
## Usage from node.js
In node.js, you won't generally have a DOM document object. To obtain one, you can use external
libraries like [jsdom](https://github.com/tmpvar/jsdom). While this repository contains a parser of
its own (`JSDOMParser`), that is restricted to reading XML-compatible markup and therefore we do
not recommend it for general use.
If you're using `jsdom` to create a DOM object, you should ensure that the page doesn't run (page)
scripts (avoid fetching remote resources etc.) as well as passing it the page's URI as the `url`
property of the `options` object you pass the `JSDOM` constructor.
### Example:
```
var JSDOM = require('jsdom').JSDOM;
var doc = new JSDOM("<body>Here's a bunch of text</body>", {
url: "https://www.example.com/the-page-i-got-the-source-from",
});
let reader = new Readability(doc);
let article = reader.parse();
```
## What's Readability-readerable?
It's a quick-and-dirty way of figuring out if it's plausible that the contents of a given

@ -28,7 +28,7 @@
"eslint": ">=4.2",
"htmltidy2": "^0.3.0",
"js-beautify": "^1.5.5",
"jsdom": "^7.0",
"jsdom": "^13.1",
"matcha": "^0.6.0",
"mocha": "^2.2.*"
}

@ -2,7 +2,7 @@ var debug = false;
var path = require("path");
var fs = require("fs");
var jsdom = require("jsdom").jsdom;
var JSDOM = require("jsdom").JSDOM;
var prettyPrint = require("./utils").prettyPrint;
var serializeDocument = require("jsdom").serializeDocument;
var http = require("http");
@ -81,7 +81,7 @@ function fetchSource(url, callbackFn) {
}
function sanitizeSource(html, callbackFn) {
htmltidy(serializeDocument(jsdom(html)), {
htmltidy(serializeDocument(new JSDOM(html)), {
"indent": true,
"indent-spaces": 4,
"numeric-entities": true,
@ -128,11 +128,8 @@ function runReadability(source, destPath, metadataDestPath) {
}
// Use jsdom for isProbablyReaderable because it supports querySelectorAll
try {
var jsdomDoc = jsdom(source, {
features: {
FetchExternalResources: false,
ProcessExternalResources: false
}
var jsdomDoc = new JSDOM(source, {
url: uri,
});
myReader = new Readability(jsdomDoc);
readerable = myReader.isProbablyReaderable();

@ -1,4 +1,4 @@
var jsdom = require("jsdom").jsdom;
var JSDOM = require("jsdom").JSDOM;
var chai = require("chai");
chai.config.includeStack = true;
var expect = chai.expect;
@ -10,13 +10,9 @@ describe("isProbablyReaderable - test pages", function() {
testPages.forEach(function(testPage) {
var uri = "http://fakehost/test/page.html";
describe(testPage.dir, function() {
var doc = jsdom(testPage.source, {
var doc = new JSDOM(testPage.source, {
url: uri,
features: {
FetchExternalResources: false,
ProcessExternalResources: false,
},
});
}).window.document;
var expected = testPage.expectedMetadata.readerable;
it("The result should " + (expected ? "" : "not ") + "be readerable", function() {
expect(readabilityCheck.isProbablyReaderable(doc)).eql(expected);

@ -1,4 +1,4 @@
var jsdom = require("jsdom").jsdom;
var JSDOM = require("jsdom").JSDOM;
var chai = require("chai");
chai.config.includeStack = true;
var expect = chai.expect;
@ -219,13 +219,9 @@ describe("Test pages", function() {
var uri = "http://fakehost/test/page.html";
runTestsWithItems("jsdom", function(source) {
var doc = jsdom(source, {
var doc = new JSDOM(source, {
url: uri,
features: {
FetchExternalResources: false,
ProcessExternalResources: false
}
});
}).window.document;
removeCommentNodesRecursively(doc);
return doc;
}, testPage.source, testPage.expectedContent, testPage.expectedMetadata);

Loading…
Cancel
Save