feat: added page counts

pull/3/head
Adam Pash 8 years ago
parent f3a5d0ecca
commit 21f444367f

@ -17,11 +17,14 @@ export default async function collectAllPages(
url,
}
) {
let pages = 2;
// At this point, we've fetched just the first page
let pages = 1;
const previousUrls = [removeAnchor(url)];
// If we've gone over 26 pages, something has
// likely gone wrong.
while (nextPageUrl && pages < 26) {
pages += 1;
$ = await Resource.create(nextPageUrl);
html = $.html();
@ -50,8 +53,11 @@ export default async function collectAllPages(
nextPageUrl = nextPageResult.nextPageUrl;
pages += 1;
}
return result;
return {
...result,
totalPages: pages,
pagesRendered: pages,
};
}

@ -19,6 +19,7 @@ const Iris = {
let result = RootExtractor.extract(Extractor, { url, html, $, metaCache });
const { title, nextPageUrl } = result;
// Fetch more pages if nextPageUrl found
if (fetchAllPages && nextPageUrl) {
result = await collectAllPages(
{
@ -33,6 +34,12 @@ const Iris = {
url,
}
);
} else {
result = {
...result,
totalPages: 1,
renderedPages: 1,
}
}
return result;

@ -29,6 +29,7 @@ describe('Iris', () => {
const result = await Iris.parse('http://www.nytimes.com/2016/08/16/upshot/the-state-of-the-clinton-trump-race-is-it-over.html?_r=0');
assert.equal(typeof result, 'object');
assert.equal(result.totalPages, 1);
// console.log(result)
});
@ -40,6 +41,11 @@ describe('Iris', () => {
{ fetchAllPages: true }
);
const { totalPages, pagesRendered } = result
assert.equal(totalPages, 3)
assert.equal(pagesRendered, 3)
// console.log(result)
assert.equal(result.nextPageUrl, `${url}2`);
// console.log(result.content)

Loading…
Cancel
Save