mirror of
https://github.com/postlight/mercury-parser
synced 2024-11-17 03:25:31 +00:00
feat: Return specific errors on failed parse attempts
This commit is contained in:
parent
a250f403f5
commit
136d6df798
@ -3,7 +3,7 @@ import cheerio from 'cheerio';
|
|||||||
import TurndownService from 'turndown';
|
import TurndownService from 'turndown';
|
||||||
|
|
||||||
import Resource from 'resource';
|
import Resource from 'resource';
|
||||||
import { validateUrl, Errors } from 'utils';
|
import { validateUrl } from 'utils';
|
||||||
import getExtractor from 'extractors/get-extractor';
|
import getExtractor from 'extractors/get-extractor';
|
||||||
import RootExtractor from 'extractors/root-extractor';
|
import RootExtractor from 'extractors/root-extractor';
|
||||||
import collectAllPages from 'extractors/collect-all-pages';
|
import collectAllPages from 'extractors/collect-all-pages';
|
||||||
@ -27,7 +27,11 @@ const Mercury = {
|
|||||||
const parsedUrl = URL.parse(url);
|
const parsedUrl = URL.parse(url);
|
||||||
|
|
||||||
if (!validateUrl(parsedUrl)) {
|
if (!validateUrl(parsedUrl)) {
|
||||||
return Errors.badUrl;
|
return {
|
||||||
|
error: true,
|
||||||
|
message:
|
||||||
|
'The url parameter passed does not look like a valid URL. Please check your URL and try again.',
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const $ = await Resource.create(url, html, parsedUrl);
|
const $ = await Resource.create(url, html, parsedUrl);
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
import assert from 'assert';
|
import assert from 'assert';
|
||||||
import { Errors } from 'utils';
|
|
||||||
|
|
||||||
import { record } from 'test-helpers';
|
import { record } from 'test-helpers';
|
||||||
import Mercury from './mercury';
|
import Mercury from './mercury';
|
||||||
@ -15,13 +14,13 @@ describe('Mercury', () => {
|
|||||||
it('returns an error if a malformed url is passed', async () => {
|
it('returns an error if a malformed url is passed', async () => {
|
||||||
const error = await Mercury.parse('foo.com');
|
const error = await Mercury.parse('foo.com');
|
||||||
|
|
||||||
assert.equal(error, Errors.badUrl);
|
assert(/does not look like a valid URL/i.test(error.message));
|
||||||
});
|
});
|
||||||
|
|
||||||
it('returns an error if a bad url is passed', async () => {
|
it('returns an error if a bad url is passed', async () => {
|
||||||
const error = await Mercury.parse('foo.com');
|
const error = await Mercury.parse('foo.com');
|
||||||
|
|
||||||
assert.equal(error, Errors.badUrl);
|
assert(/does not look like a valid URL/i.test(error.message));
|
||||||
});
|
});
|
||||||
|
|
||||||
it('does the whole thing', async () => {
|
it('does the whole thing', async () => {
|
||||||
@ -38,15 +37,15 @@ describe('Mercury', () => {
|
|||||||
'https://www.thekitchn.com/instant-pot-chicken-pesto-pasta-eating-instantly-267141'
|
'https://www.thekitchn.com/instant-pot-chicken-pesto-pasta-eating-instantly-267141'
|
||||||
);
|
);
|
||||||
|
|
||||||
assert.equal(error, Errors.badUrl);
|
assert(/instructed to reject non-2xx/i.test(error.message));
|
||||||
});
|
});
|
||||||
|
|
||||||
it('does blogger', async () => {
|
it('returns an error on invalid content types', async () => {
|
||||||
const result = await Mercury.parse(
|
const error = await Mercury.parse(
|
||||||
'https://googleblog.blogspot.com/2016/08/onhub-turns-one-today.html'
|
'https://upload.wikimedia.org/wikipedia/commons/5/52/Spacer.gif'
|
||||||
);
|
);
|
||||||
|
|
||||||
assert.equal(typeof result, 'object');
|
assert(/content-type for this resource/i.test(error.message));
|
||||||
});
|
});
|
||||||
|
|
||||||
it('does blogger', async () => {
|
it('does blogger', async () => {
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
import assert from 'assert';
|
import assert from 'assert';
|
||||||
import cheerio from 'cheerio';
|
import cheerio from 'cheerio';
|
||||||
import { Errors } from 'utils';
|
|
||||||
import { getEncoding } from 'utils/text';
|
import { getEncoding } from 'utils/text';
|
||||||
|
|
||||||
import { record } from 'test-helpers';
|
import { record } from 'test-helpers';
|
||||||
@ -23,7 +22,7 @@ describe('Resource', () => {
|
|||||||
const url = 'http://nytimes.com/500';
|
const url = 'http://nytimes.com/500';
|
||||||
const error = await Resource.create(url);
|
const error = await Resource.create(url);
|
||||||
|
|
||||||
assert.equal(error, Errors.badUrl);
|
assert(/instructed to reject non-2xx/i.test(error.message));
|
||||||
});
|
});
|
||||||
|
|
||||||
it('fetches with different encoding on body', async () => {
|
it('fetches with different encoding on body', async () => {
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
import URL from 'url';
|
import URL from 'url';
|
||||||
import request from 'postman-request';
|
import request from 'postman-request';
|
||||||
import { Errors } from 'utils';
|
|
||||||
|
|
||||||
import {
|
import {
|
||||||
REQUEST_HEADERS,
|
REQUEST_HEADERS,
|
||||||
@ -119,6 +118,9 @@ export default async function fetchResource(url, parsedUrl) {
|
|||||||
response,
|
response,
|
||||||
};
|
};
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
return Errors.badUrl;
|
return {
|
||||||
|
error: true,
|
||||||
|
message: e.message,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,9 +0,0 @@
|
|||||||
const Errors = {
|
|
||||||
badUrl: {
|
|
||||||
error: true,
|
|
||||||
messages:
|
|
||||||
'The url parameter passed does not look like a valid URL. Please check your data and try again.',
|
|
||||||
},
|
|
||||||
};
|
|
||||||
|
|
||||||
export default Errors;
|
|
@ -1,3 +1,2 @@
|
|||||||
export { default as range } from './range';
|
export { default as range } from './range';
|
||||||
export { default as validateUrl } from './validate-url';
|
export { default as validateUrl } from './validate-url';
|
||||||
export { default as Errors } from './errors';
|
|
||||||
|
Loading…
Reference in New Issue
Block a user