You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
115 lines
3.2 KiB
JavaScript
115 lines
3.2 KiB
JavaScript
import URL from 'url';
|
|
import request from 'request';
|
|
import { Errors } from 'utils';
|
|
|
|
import {
|
|
REQUEST_HEADERS,
|
|
FETCH_TIMEOUT,
|
|
BAD_CONTENT_TYPES_RE,
|
|
MAX_CONTENT_LENGTH,
|
|
} from './constants';
|
|
|
|
function get(options) {
|
|
return new Promise((resolve, reject) => {
|
|
request(options, (err, response, body) => {
|
|
if (err) {
|
|
reject(err);
|
|
} else {
|
|
resolve({ body, response });
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
// Evaluate a response to ensure it's something we should be keeping.
|
|
// This does not validate in the sense of a response being 200 level or
|
|
// not. Validation here means that we haven't found reason to bail from
|
|
// further processing of this url.
|
|
|
|
export function validateResponse(response, parseNon2xx = false) {
|
|
// Check if we got a valid status code
|
|
// This isn't great, but I'm requiring a statusMessage to be set
|
|
// before short circuiting b/c nock doesn't set it in tests
|
|
// statusMessage only not set in nock response, in which case
|
|
// I check statusCode, which is currently only 200 for OK responses
|
|
// in tests
|
|
if (
|
|
(response.statusMessage && response.statusMessage !== 'OK') ||
|
|
response.statusCode !== 200
|
|
) {
|
|
if (!response.statusCode) {
|
|
throw new Error(
|
|
`Unable to fetch content. Original exception was ${response.error}`
|
|
);
|
|
} else if (!parseNon2xx) {
|
|
throw new Error(
|
|
`Resource returned a response status code of ${response.statusCode} and resource was instructed to reject non-2xx level status codes.`
|
|
);
|
|
}
|
|
}
|
|
|
|
const {
|
|
'content-type': contentType,
|
|
'content-length': contentLength,
|
|
} = response.headers;
|
|
|
|
// Check that the content is not in BAD_CONTENT_TYPES
|
|
if (BAD_CONTENT_TYPES_RE.test(contentType)) {
|
|
throw new Error(
|
|
`Content-type for this resource was ${contentType} and is not allowed.`
|
|
);
|
|
}
|
|
|
|
// Check that the content length is below maximum
|
|
if (contentLength > MAX_CONTENT_LENGTH) {
|
|
throw new Error(
|
|
`Content for this resource was too large. Maximum content length is ${MAX_CONTENT_LENGTH}.`
|
|
);
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
// Grabs the last two pieces of the URL and joins them back together
|
|
// This is to get the 'livejournal.com' from 'erotictrains.livejournal.com'
|
|
export function baseDomain({ host }) {
|
|
return host.split('.').slice(-2).join('.');
|
|
}
|
|
|
|
// Set our response attribute to the result of fetching our URL.
|
|
// TODO: This should gracefully handle timeouts and raise the
|
|
// proper exceptions on the many failure cases of HTTP.
|
|
// TODO: Ensure we are not fetching something enormous. Always return
|
|
// unicode content for HTML, with charset conversion.
|
|
|
|
export default async function fetchResource(url, parsedUrl) {
|
|
parsedUrl = parsedUrl || URL.parse(encodeURI(url));
|
|
|
|
const options = {
|
|
url: parsedUrl.href,
|
|
headers: { ...REQUEST_HEADERS },
|
|
timeout: FETCH_TIMEOUT,
|
|
// Don't set encoding; fixes issues
|
|
// w/gzipped responses
|
|
encoding: null,
|
|
// Accept cookies
|
|
jar: true,
|
|
// Accept and decode gzip
|
|
gzip: true,
|
|
// Follow any redirect
|
|
followAllRedirects: true,
|
|
};
|
|
|
|
const { response, body } = await get(options);
|
|
|
|
try {
|
|
validateResponse(response);
|
|
return {
|
|
body,
|
|
response,
|
|
};
|
|
} catch (e) {
|
|
return Errors.badUrl;
|
|
}
|
|
}
|