feat: added text direction to response

pull/3/head
Adam Pash 8 years ago
parent f0f216c7b9
commit d60d396c98

28
dist/iris.js vendored

@ -6,6 +6,7 @@ var babelPolyfill = require('babel-polyfill');
var cheerio = _interopDefault(require('cheerio'));
var URL = _interopDefault(require('url'));
var request = _interopDefault(require('request'));
var stringDirection = _interopDefault(require('string-direction'));
var validUrl = _interopDefault(require('valid-url'));
var moment = _interopDefault(require('moment'));
var wuzzy = _interopDefault(require('wuzzy'));
@ -849,6 +850,8 @@ var WikipediaExtractor = {
content: {
selectors: ['#mw-content-text'],
defaultCleaner: false,
// transform top infobox to an image with caption
transforms: {
'.infobox img': function infoboxImg($node) {
@ -2709,7 +2712,7 @@ var GenericAuthorExtractor = {
}
// Second, look through our selectors looking for potential authors.
author = extractFromSelectors($, AUTHOR_SELECTORS, 2, { contains: true });
author = extractFromSelectors($, AUTHOR_SELECTORS, 2);
if (author && author.length < AUTHOR_MAX_LENGTH) {
return cleanAuthor(author);
}
@ -3698,6 +3701,10 @@ var GenericExtractor = {
url_and_domain: GenericUrlExtractor.extract,
excerpt: GenericExcerptExtractor.extract,
word_count: GenericWordCountExtractor.extract,
direction: function direction(_ref) {
var title = _ref.title;
return stringDirection.getDirection(title);
},
extract: function extract(options) {
var html = options.html;
@ -3717,6 +3724,7 @@ var GenericExtractor = {
var next_page_url = this.next_page_url(options);
var excerpt = this.excerpt(_extends({}, options, { content: content }));
var word_count = this.word_count(_extends({}, options, { content: content }));
var direction = this.direction({ title: title });
var _url_and_domain = this.url_and_domain(options);
@ -3735,7 +3743,8 @@ var GenericExtractor = {
url: url,
domain: domain,
excerpt: excerpt,
word_count: word_count
word_count: word_count,
direction: direction
};
}
};
@ -3807,6 +3816,8 @@ function select(opts) {
if (typeof extractionOpts === 'string') return extractionOpts;
var selectors = extractionOpts.selectors;
var _extractionOpts$defau = extractionOpts.defaultCleaner;
var defaultCleaner = _extractionOpts$defau === undefined ? true : _extractionOpts$defau;
var matchingSelector = selectors.find(function (selector) {
@ -3846,7 +3857,14 @@ function select(opts) {
// otherwise use the text of the node
result = $(matchingSelector).text();
}
return Cleaners[type](result, opts);
// Allow custom extractor to skip default cleaner
// for this type; defaults to true
if (defaultCleaner) {
return Cleaners[type](result, opts);
}
return result;
}
function extractResult(opts) {
@ -3891,6 +3909,7 @@ var RootExtractor = {
var dek = extractResult(_extends({}, opts, { type: 'dek', content: content }));
var excerpt = extractResult(_extends({}, opts, { type: 'excerpt', content: content }));
var word_count = extractResult(_extends({}, opts, { type: 'word_count', content: content }));
var direction = extractResult(_extends({}, opts, { type: 'direction', title: title }));
var _extractResult = extractResult(_extends({}, opts, { type: 'url_and_domain' }));
@ -3909,7 +3928,8 @@ var RootExtractor = {
url: url,
domain: domain,
excerpt: excerpt,
word_count: word_count
word_count: word_count,
direction: direction
};
}
};

2
dist/iris.js.map vendored

File diff suppressed because one or more lines are too long

@ -45,6 +45,7 @@
"moment": "^2.14.1",
"request": "^2.74.0",
"request-promise": "^4.1.1",
"string-direction": "^0.1.2",
"valid-url": "^1.0.9",
"wuzzy": "^0.1.2"
}

@ -1,4 +1,5 @@
import cheerio from 'cheerio';
import stringDirection from 'string-direction';
import GenericContentExtractor from './content/extractor';
import GenericTitleExtractor from './title/extractor';
@ -24,6 +25,7 @@ const GenericExtractor = {
url_and_domain: GenericUrlExtractor.extract,
excerpt: GenericExcerptExtractor.extract,
word_count: GenericWordCountExtractor.extract,
direction: ({ title }) => stringDirection.getDirection(title),
extract(options) {
const { html } = options;
@ -42,6 +44,7 @@ const GenericExtractor = {
const next_page_url = this.next_page_url(options);
const excerpt = this.excerpt({ ...options, content });
const word_count = this.word_count({ ...options, content });
const direction = this.direction({ title });
const { url, domain } = this.url_and_domain(options);
return {
@ -56,6 +59,7 @@ const GenericExtractor = {
domain,
excerpt,
word_count,
direction,
};
},
};

@ -137,6 +137,7 @@ const RootExtractor = {
const dek = extractResult({ ...opts, type: 'dek', content });
const excerpt = extractResult({ ...opts, type: 'excerpt', content });
const word_count = extractResult({ ...opts, type: 'word_count', content });
const direction = extractResult({ ...opts, type: 'direction', title });
const { url, domain } = extractResult({ ...opts, type: 'url_and_domain' });
return {
@ -151,6 +152,7 @@ const RootExtractor = {
domain,
excerpt,
word_count,
direction,
};
},
};

@ -22,6 +22,7 @@ describe('RootExtractor', () => {
url,
title,
word_count,
direction,
} = RootExtractor.extract(
NYMagExtractor, { url: fullUrl, html, $, metaCache: [] }
);
@ -29,6 +30,7 @@ describe('RootExtractor', () => {
assert.equal(title, 'Trump Claims He Discussed $25K Donation With Florida Attorney General, But Not Trump University Investigation');
assert.equal(url, fullUrl);
assert.equal(word_count, 727);
assert.equal(direction, 'ltr');
});
});

Loading…
Cancel
Save