diff --git a/README.md b/README.md index e0f0413..4982a02 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,6 @@ For outstanding issues, see the issue list in this repo, as well as this [bug li To test local changes to Readability.js, you can use the [automated tests](#tests). There's a [node script](https://github.com/mozilla/readability/blob/master/test/generate-testcase.js) to help you create new ones. -Note that because `JSDOMParser` is restricted to parsing XHTML-compatible input, you will likely need to tweak any input you fetch directly from the internet (e.g. to close `` tags). Even if creating a 'readable' version fails, the script will leave the input for you to change. You can then re-run the `generate-testcase.js` script passing only the test page slug, and it will reuse the altered input. Ideally we should fix the `generate-testcase.js` script to no longer need this manual pre/post-processing. If you have time to help with this, a pull request would be very welcome! - Please make sure to run [eslint](http://eslint.org/) against any proposed changes when creating a pull request. ## Usage diff --git a/package.json b/package.json index 5b8b5fb..27df1b4 100644 --- a/package.json +++ b/package.json @@ -20,12 +20,13 @@ "url": "https://github.com/mozilla/readability/issues" }, "engines": { - "node" : ">=7.0" + "node": ">=7.0" }, "homepage": "https://github.com/mozilla/readability", "devDependencies": { "chai": "^2.1.*", "eslint": ">=4.2", + "htmltidy2": "^0.3.0", "js-beautify": "^1.5.5", "jsdom": "^7.0", "matcha": "^0.6.0", diff --git a/test/generate-testcase.js b/test/generate-testcase.js index 9eaa8ec..6af9d2c 100644 --- a/test/generate-testcase.js +++ b/test/generate-testcase.js @@ -7,6 +7,7 @@ var prettyPrint = require("./utils").prettyPrint; var serializeDocument = require("jsdom").serializeDocument; var http = require("http"); var urlparse = require("url").parse; +var htmltidy = require("htmltidy2").tidy; var readability = require("../index"); var Readability = readability.Readability; @@ -75,13 +76,22 @@ function fetchSource(url, callbackFn) { console.log("End received"); } // Sanitize: - rv = prettyPrint(serializeDocument(jsdom(rv))); - callbackFn(rv); + htmltidy(serializeDocument(jsdom(rv)), { + "indent": true, + "indent-spaces": 4, + "output-xhtml": true, + "wrap": 0 + }, callbackFn); }); }); } -function onResponseReceived(source) { +function onResponseReceived(error, source) { + if (error) { + console.error("Couldn't tidy source html!"); + console.error(error); + return; + } if (debug) { console.log("writing"); }