You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mercury-parser/src/resource/utils/constants.js

38 lines
1.0 KiB
JavaScript

import cheerio from 'cheerio';
// Browser does not like us setting user agent
export const REQUEST_HEADERS = cheerio.browser ? {} : {
'User-Agent': 'Mercury - https://mercury.postlight.com/web-parser/',
};
// The number of milliseconds to attempt to fetch a resource before timing out.
export const FETCH_TIMEOUT = 10000;
// Content types that we do not extract content from
const BAD_CONTENT_TYPES = [
'audio/mpeg',
'image/gif',
'image/jpeg',
'image/jpg',
];
export const BAD_CONTENT_TYPES_RE = new RegExp(`^(${BAD_CONTENT_TYPES.join('|')})$`, 'i');
// Use this setting as the maximum size an article can be
// for us to attempt parsing. Defaults to 5 MB.
export const MAX_CONTENT_LENGTH = 5242880;
// Turn the global proxy on or off
// Proxying is not currently enabled in Python source
// so not implementing logic in port.
export const PROXY_DOMAINS = false;
export const REQUESTS_PROXIES = {
http: 'http://38.98.105.139:33333',
https: 'http://38.98.105.139:33333',
};
export const DOMAINS_TO_PROXY = [
'nih.gov',
'gutenberg.org',
];