You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mercury-parser/src/extractors/generic/index.test.js

55 lines
1.3 KiB
JavaScript

import assert from 'assert';
import fs from 'fs';
8 years ago
import GenericExtractor from './index';
8 years ago
describe('GenericExtractor', () => {
describe('extract(opts)', () => {
it('extracts this old LA Times article', () => {
const html = fs.readFileSync('../fixtures/latimes.html', 'utf-8');
8 years ago
const {
title,
author,
datePublished,
dek,
} = GenericExtractor.extract(
{ url: 'http://latimes.com', html, metaCache: [] }
);
assert.equal(author, null);
assert.equal(
title,
'California appears poised to be first to ban power-guzzling big-screen TVs'
);
assert.equal(
8 years ago
datePublished,
'2009-10-14T04:00:00.000Z'
);
assert.equal(dek, null);
});
8 years ago
it('extracts html and returns the article title', () => {
const html = fs.readFileSync('../fixtures/wired.html', 'utf-8');
8 years ago
const {
author,
title,
datePublished,
dek,
} = GenericExtractor.extract(
{ url: 'http://wired.com', html, metaCache: [] }
);
assert.equal(author, 'Eric Adams');
assert.equal(
title,
'Airplane Tires Dont Explode on Landing Because They Are Pumped!'
);
assert.equal(datePublished, null);
assert.equal(dek, null);
});
});
});
8 years ago