You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
38 lines
1.0 KiB
JavaScript
38 lines
1.0 KiB
JavaScript
import cheerio from 'cheerio';
|
|
|
|
// Browser does not like us setting user agent
|
|
export const REQUEST_HEADERS = cheerio.browser ? {} : {
|
|
'User-Agent': 'Mercury - https://mercury.postlight.com/web-parser/',
|
|
};
|
|
|
|
// The number of milliseconds to attempt to fetch a resource before timing out.
|
|
export const FETCH_TIMEOUT = 10000;
|
|
|
|
// Content types that we do not extract content from
|
|
const BAD_CONTENT_TYPES = [
|
|
'audio/mpeg',
|
|
'image/gif',
|
|
'image/jpeg',
|
|
'image/jpg',
|
|
];
|
|
|
|
export const BAD_CONTENT_TYPES_RE = new RegExp(`^(${BAD_CONTENT_TYPES.join('|')})$`, 'i');
|
|
|
|
// Use this setting as the maximum size an article can be
|
|
// for us to attempt parsing. Defaults to 5 MB.
|
|
export const MAX_CONTENT_LENGTH = 5242880;
|
|
|
|
// Turn the global proxy on or off
|
|
// Proxying is not currently enabled in Python source
|
|
// so not implementing logic in port.
|
|
export const PROXY_DOMAINS = false;
|
|
export const REQUESTS_PROXIES = {
|
|
http: 'http://38.98.105.139:33333',
|
|
https: 'http://38.98.105.139:33333',
|
|
};
|
|
|
|
export const DOMAINS_TO_PROXY = [
|
|
'nih.gov',
|
|
'gutenberg.org',
|
|
];
|