You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mercury-parser/src/extractors/custom/www.reddit.com/index.test.js

227 lines
6.9 KiB
JavaScript

import assert from 'assert';
import URL from 'url';
import cheerio from 'cheerio';
import dayjs from 'dayjs';
import Mercury from 'mercury';
import getExtractor from 'extractors/get-extractor';
import { excerptContent } from 'utils/text';
const fs = require('fs');
describe('WwwRedditComExtractor', () => {
describe('initial test case', () => {
let result;
let url;
beforeAll(() => {
url =
'https://www.reddit.com/r/Showerthoughts/comments/awx46q/vanilla_becoming_the_default_flavour_of_ice_cream/';
const html = fs.readFileSync('./fixtures/www.reddit.com.html');
result = Mercury.parse(url, { html, fallback: false });
});
it('is selected properly', () => {
// This test should be passing by default.
// It sanity checks that the correct parser
// is being selected for URLs from this domain
const extractor = getExtractor(url);
assert.equal(extractor.domain, URL.parse(url).hostname);
});
it('returns the title', async () => {
// To pass this test, fill out the title selector
// in ./src/extractors/custom/www.reddit.com/index.js.
const { title } = await result;
// Update these values with the expected values from
// the article.
assert.equal(
title,
`Vanilla becoming the default flavour of ice cream is the greatest underdog story of all time.`
);
});
it('returns the author', async () => {
// To pass this test, fill out the author selector
// in ./src/extractors/custom/www.reddit.com/index.js.
const { author } = await result;
// Update these values with the expected values from
// the article.
assert.equal(author, 'u/benyacobi');
});
it('returns the date_published', async () => {
// To pass this test, fill out the date_published selector
// in ./src/extractors/custom/www.reddit.com/index.js.
const { date_published } = await result;
const expectedDate = dayjs()
.subtract(4, 'years')
.format()
.split('T')[0];
// Update these values with the expected values from
// the article.
assert.equal(date_published.split('T')[0], expectedDate);
});
it('returns the lead_image_url', async () => {
const html = fs.readFileSync('./fixtures/www.reddit.com--image.html');
const uri =
'https://www.reddit.com/r/aww/comments/aybw1m/nothing_to_see_here_human/';
// To pass this test, fill out the lead_image_url selector
// in ./src/extractors/custom/www.reddit.com/index.js.
const { lead_image_url } = await Mercury.parse(uri, { html });
// Update these values with the expected values from
// the article.
assert.equal(
lead_image_url,
'https://preview.redd.it/jsc4t74psok21.jpg?auto=webp&s=9c9826487e34d399333f65beb64390206fff4125'
);
});
it('returns the content for text posts', async () => {
// To pass this test, fill out the content selector
// in ./src/extractors/custom/www.reddit.com/index.js.
// You may also want to make use of the clean and transform
// options.
const { content } = await result;
const $ = cheerio.load(content || '');
const first13 = excerptContent(
$('*')
.first()
.text(),
13
);
// Update these values with the expected values from
// the article.
assert.equal(
first13,
'Edit: thank you for educating me about the ubiquity of vanilla. Still, none'
);
});
it('handles posts that only have a title', async () => {
const html = fs.readFileSync(
'./fixtures/www.reddit.com--title-only.html'
);
const uri =
'https://www.reddit.com/r/AskReddit/comments/axtih6/what_is_the_most_worth_it_item_you_have_ever/';
const { content } = await Mercury.parse(uri, { html });
const $ = cheerio.load(content || '');
const first13 = excerptContent(
$('*')
.first()
.text(),
13
);
assert.equal(first13, '');
});
it('handles image posts', async () => {
const html = fs.readFileSync('./fixtures/www.reddit.com--image.html');
const uri =
'https://www.reddit.com/r/aww/comments/aybw1m/nothing_to_see_here_human/';
const { content } = await Mercury.parse(uri, { html });
const $ = cheerio.load(content || '');
const image = $(
'img[src="https://preview.redd.it/jsc4t74psok21.jpg?width=960&crop=smart&auto=webp&s=54349b21ff628e8c22c053509e86ba84ff9751d3"]'
);
assert.equal(image.length, 1);
});
it('handles video posts', async () => {
const html = fs.readFileSync('./fixtures/www.reddit.com--video.html');
const uri =
'https://www.reddit.com/r/HumansBeingBros/comments/aybtf7/thanks_human/';
const { content } = await Mercury.parse(uri, { html });
const $ = cheerio.load(content || '');
const video = $('video');
assert.equal(video.length, 1);
});
it('handles external link posts with image preview', async () => {
const html = fs.readFileSync(
'./fixtures/www.reddit.com--external-link.html'
);
const uri =
'https://www.reddit.com/r/todayilearned/comments/aycizd/til_that_when_jrr_tolkiens_son_michael_signed_up/';
const { content } = await Mercury.parse(uri, { html });
const $ = cheerio.load(content || '');
const link = $(
'a[href="https://www.1843magazine.com/culture/look-closer/tolkiens-drawings-reveal-a-wizard-at-work"]'
);
const image = $(
'img[src="https://b.thumbs.redditmedia.com/mZhJhz9fAxHzdGRcA-tbCa06IieIIuI0YWfdSYQa3Uk.jpg"]'
);
assert.equal(link.length, 2);
assert.equal(image.length, 1);
});
it('handles external image posts with image preview', async () => {
const html = fs.readFileSync(
'./fixtures/www.reddit.com--external-image.html'
);
const uri =
'https://www.reddit.com/r/gifs/comments/4vv0sa/leonardo_dicaprio_scaring_jonah_hill_on_the/';
const { content } = await Mercury.parse(uri, { html });
const $ = cheerio.load(content || '');
const link = $('a[href="http://i.imgur.com/Qcx1DSD.gifv"]');
const video = $('video');
assert.equal(link.length, 1);
assert.equal(video.length, 1);
});
it('handles external link posts with embedded media', async () => {
const html = fs.readFileSync('./fixtures/www.reddit.com--embedded.html');
const uri =
'https://www.reddit.com/r/videos/comments/5gafop/rick_astley_never_gonna_give_you_up_sped_up_every/';
const { content } = await Mercury.parse(uri, { html });
const $ = cheerio.load(content || '');
const link = $('a[href="https://youtu.be/dQw4w9WgXcQ"]');
const embed = $(
'iframe[src^="https://www.redditmedia.com/mediaembed/5gafop"]'
);
assert.equal(link.length, 1);
assert.equal(embed.length, 1);
});
});
});