From 21f444367fea49a7b176dec8e0c7550621f1eb1d Mon Sep 17 00:00:00 2001 From: Adam Pash Date: Wed, 14 Sep 2016 12:21:32 -0400 Subject: [PATCH] feat: added page counts --- src/extractors/collect-all-pages.js | 12 +++++++++--- src/iris.js | 7 +++++++ src/iris.test.js | 6 ++++++ 3 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/extractors/collect-all-pages.js b/src/extractors/collect-all-pages.js index d297b3ee..61efc0b8 100644 --- a/src/extractors/collect-all-pages.js +++ b/src/extractors/collect-all-pages.js @@ -17,11 +17,14 @@ export default async function collectAllPages( url, } ) { - let pages = 2; + // At this point, we've fetched just the first page + let pages = 1; const previousUrls = [removeAnchor(url)]; + // If we've gone over 26 pages, something has // likely gone wrong. while (nextPageUrl && pages < 26) { + pages += 1; $ = await Resource.create(nextPageUrl); html = $.html(); @@ -50,8 +53,11 @@ export default async function collectAllPages( nextPageUrl = nextPageResult.nextPageUrl; - pages += 1; } - return result; + return { + ...result, + totalPages: pages, + pagesRendered: pages, + }; } diff --git a/src/iris.js b/src/iris.js index d0be2a78..555a8e98 100644 --- a/src/iris.js +++ b/src/iris.js @@ -19,6 +19,7 @@ const Iris = { let result = RootExtractor.extract(Extractor, { url, html, $, metaCache }); const { title, nextPageUrl } = result; + // Fetch more pages if nextPageUrl found if (fetchAllPages && nextPageUrl) { result = await collectAllPages( { @@ -33,6 +34,12 @@ const Iris = { url, } ); + } else { + result = { + ...result, + totalPages: 1, + renderedPages: 1, + } } return result; diff --git a/src/iris.test.js b/src/iris.test.js index db4fce6a..88749bec 100644 --- a/src/iris.test.js +++ b/src/iris.test.js @@ -29,6 +29,7 @@ describe('Iris', () => { const result = await Iris.parse('http://www.nytimes.com/2016/08/16/upshot/the-state-of-the-clinton-trump-race-is-it-over.html?_r=0'); assert.equal(typeof result, 'object'); + assert.equal(result.totalPages, 1); // console.log(result) }); @@ -40,6 +41,11 @@ describe('Iris', () => { { fetchAllPages: true } ); + const { totalPages, pagesRendered } = result + + assert.equal(totalPages, 3) + assert.equal(pagesRendered, 3) + // console.log(result) assert.equal(result.nextPageUrl, `${url}2`); // console.log(result.content)