diff --git a/.travis.yml b/.travis.yml index 12e92a8..bb5286b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,7 @@ language: node_js sudo: false node_js: - - '6.5' + - '7.0' script: - npm run lint - npm run test diff --git a/JSDOMParser.js b/JSDOMParser.js index 18328f6..7b86601 100644 --- a/JSDOMParser.js +++ b/JSDOMParser.js @@ -553,7 +553,8 @@ }, }; - var Document = function () { + var Document = function (url) { + this.documentURI = url; this.styleSheets = []; this.childNodes = []; this.children = []; @@ -593,6 +594,20 @@ node.textContent = text; return node; }, + + get baseURI() { + if (!this.hasOwnProperty("_baseURI")) { + this._baseURI = this.documentURI; + var baseElements = this.getElementsByTagName("base"); + var href = baseElements[0] && baseElements[0].getAttribute("href"); + if (href) { + try { + this._baseURI = (new URL(href, this._baseURI)).href; + } catch (ex) {/* Just fall back to documentURI */} + } + } + return this._baseURI; + }, }; var Element = function (tag) { @@ -1111,9 +1126,9 @@ /** * Parses an HTML string and returns a JS implementation of the Document. */ - parse: function (html) { + parse: function (html, url) { this.html = html; - var doc = this.doc = new Document(); + var doc = this.doc = new Document(url); this.readChildren(doc); // If this is an HTML document, remove root-level children except for the diff --git a/index.js b/index.js index 214c4f2..8af525e 100644 --- a/index.js +++ b/index.js @@ -1,5 +1,6 @@ var path = require("path"); var fs = require("fs"); +var url = require("url"); // We want to load Readability and JSDOMParser, which aren't set up as commonjs libraries, // and so we need to do some hocus-pocus with 'vm' to import them on a separate scope @@ -14,6 +15,7 @@ var scopeContext = {}; // in the scope we're using: scopeContext.dump = console.log; scopeContext.console = console; +scopeContext.URL = url.URL; // Actually load files. NB: if either of the files has parse errors, // node is dumb and shows you a syntax error *at this callsite* . Don't try to find diff --git a/package.json b/package.json index f03bdad..5b8b5fb 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "readability", - "version": "0.0.1", + "version": "0.1.0", "description": "A standalone version of the readability library used for Firefox Reader View.", "main": "Readability.js", "scripts": { @@ -20,7 +20,7 @@ "url": "https://github.com/mozilla/readability/issues" }, "engines": { - "node" : ">=6.5" + "node" : ">=7.0" }, "homepage": "https://github.com/mozilla/readability", "devDependencies": { diff --git a/test/test-jsdomparser.js b/test/test-jsdomparser.js index 7facd49..38ee52f 100644 --- a/test/test-jsdomparser.js +++ b/test/test-jsdomparser.js @@ -9,7 +9,7 @@ var BASETESTCASE = '

Some text and a '

With a that is fun.And another node to make it harder
Here\'s a form'; -var baseDoc = new JSDOMParser().parse(BASETESTCASE); +var baseDoc = new JSDOMParser().parse(BASETESTCASE, "http://fakehost/"); describe("Test JSDOM functionality", function() { function nodeExpect(actual, expected) { @@ -37,6 +37,11 @@ describe("Test JSDOM functionality", function() { }); + it("should have basic URI information", function() { + expect(baseDoc.documentURI, "http://fakehost/"); + expect(baseDoc.baseURI, "http://fakehost/"); + }); + it("should deal with script tags", function() { // Check our script parsing worked: var scripts = baseDoc.getElementsByTagName("script"); @@ -300,3 +305,18 @@ describe("Recovery from self-closing tags that have close tags", function() { expect(doc.firstChild.firstChild.firstChild.localName).eql("p"); }); }); + +describe("baseURI parsing", function() { + it("should handle various types of relative and absolute base URIs", function() { + function checkBase(base, expectedResult) { + var html = ""; + var doc = new JSDOMParser().parse(html, "http://fakehost/some/dir/"); + expect(doc.baseURI).eql(expectedResult); + } + + checkBase("relative/path", "http://fakehost/some/dir/relative/path"); + checkBase("/path", "http://fakehost/path"); + checkBase("http://absolute/", "http://absolute/"); + checkBase("//absolute/path", "http://absolute/path"); + }); +}); diff --git a/test/test-readability.js b/test/test-readability.js index 14b0276..bc34095 100644 --- a/test/test-readability.js +++ b/test/test-readability.js @@ -229,6 +229,7 @@ describe("Test pages", function() { runTestsWithItems("jsdom", function(source) { var doc = jsdom(source, { + url: uri.spec, features: { FetchExternalResources: false, ProcessExternalResources: false @@ -240,7 +241,7 @@ describe("Test pages", function() { runTestsWithItems("JSDOMParser", function(source) { var parser = new JSDOMParser(); - var doc = parser.parse(source); + var doc = parser.parse(source, uri.spec); if (parser.errorState) { console.error("Parsing this DOM caused errors:", parser.errorState); return null;