mirror of
https://github.com/postlight/mercury-parser
synced 2024-10-31 03:20:40 +00:00
fix: custom parser generator
- swap fs import - fix rollup config
This commit is contained in:
parent
0e27448866
commit
a60d5a6332
7614
dist/generate-custom-parser.js
vendored
7614
dist/generate-custom-parser.js
vendored
File diff suppressed because it is too large
Load Diff
26
dist/mercury.js
vendored
26
dist/mercury.js
vendored
@ -4,6 +4,7 @@ function _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'defau
|
||||
|
||||
var _regeneratorRuntime = _interopDefault(require('@babel/runtime-corejs2/regenerator'));
|
||||
var _objectSpread = _interopDefault(require('@babel/runtime-corejs2/helpers/objectSpread'));
|
||||
var _objectWithoutProperties = _interopDefault(require('@babel/runtime-corejs2/helpers/objectWithoutProperties'));
|
||||
var _asyncToGenerator = _interopDefault(require('@babel/runtime-corejs2/helpers/asyncToGenerator'));
|
||||
var URL = _interopDefault(require('url'));
|
||||
var cheerio = _interopDefault(require('cheerio'));
|
||||
@ -178,13 +179,16 @@ function excerptContent(content) {
|
||||
|
||||
function getEncoding(str) {
|
||||
var encoding = DEFAULT_ENCODING;
|
||||
var matches = ENCODING_RE.exec(str);
|
||||
|
||||
if (ENCODING_RE.test(str)) {
|
||||
var testEncode = ENCODING_RE.exec(str)[1];
|
||||
if (matches !== null) {
|
||||
var _matches = _slicedToArray(matches, 2);
|
||||
|
||||
if (iconv.encodingExists(testEncode)) {
|
||||
encoding = testEncode;
|
||||
str = _matches[1];
|
||||
}
|
||||
|
||||
if (iconv.encodingExists(str)) {
|
||||
encoding = str;
|
||||
}
|
||||
|
||||
return encoding;
|
||||
@ -1701,10 +1705,10 @@ var Resource = {
|
||||
var decodedContent = iconv.decode(content, encoding);
|
||||
var $ = cheerio.load(decodedContent); // after first cheerio.load, check to see if encoding matches
|
||||
|
||||
var metaContentType = $('meta[http-equiv=content-type]').attr('content');
|
||||
var metaContentType = $('meta[http-equiv=content-type i]').attr('content') || $('meta[charset]').attr('charset');
|
||||
var properEncoding = getEncoding(metaContentType); // if encodings in the header/body dont match, use the one in the body
|
||||
|
||||
if (properEncoding !== encoding) {
|
||||
if (metaContentType && properEncoding !== encoding) {
|
||||
decodedContent = iconv.decode(content, properEncoding);
|
||||
$ = cheerio.load(decodedContent);
|
||||
}
|
||||
@ -6429,8 +6433,10 @@ var Mercury = {
|
||||
parse: function () {
|
||||
var _parse = _asyncToGenerator(
|
||||
/*#__PURE__*/
|
||||
_regeneratorRuntime.mark(function _callee(url, html) {
|
||||
var opts,
|
||||
_regeneratorRuntime.mark(function _callee(url) {
|
||||
var _ref,
|
||||
html,
|
||||
opts,
|
||||
_opts$fetchAllPages,
|
||||
fetchAllPages,
|
||||
_opts$fallback,
|
||||
@ -6451,7 +6457,7 @@ var Mercury = {
|
||||
while (1) {
|
||||
switch (_context.prev = _context.next) {
|
||||
case 0:
|
||||
opts = _args.length > 2 && _args[2] !== undefined ? _args[2] : {};
|
||||
_ref = _args.length > 1 && _args[1] !== undefined ? _args[1] : {}, html = _ref.html, opts = _objectWithoutProperties(_ref, ["html"]);
|
||||
_opts$fetchAllPages = opts.fetchAllPages, fetchAllPages = _opts$fetchAllPages === void 0 ? true : _opts$fetchAllPages, _opts$fallback = opts.fallback, fallback = _opts$fallback === void 0 ? true : _opts$fallback, _opts$contentType = opts.contentType, contentType = _opts$contentType === void 0 ? 'html' : _opts$contentType; // if no url was passed and this is the browser version,
|
||||
// set url to window.location.href and load the html
|
||||
// from the current page
|
||||
@ -6549,7 +6555,7 @@ var Mercury = {
|
||||
}, _callee, this);
|
||||
}));
|
||||
|
||||
function parse(_x, _x2) {
|
||||
function parse(_x) {
|
||||
return _parse.apply(this, arguments);
|
||||
}
|
||||
|
||||
|
@ -1,15 +1,20 @@
|
||||
/* eslint-disable import/no-extraneous-dependencies */
|
||||
import babel from 'rollup-plugin-babel';
|
||||
import babelrc from 'babelrc-rollup'; // eslint-disable-line import/extensions
|
||||
import commonjs from 'rollup-plugin-commonjs';
|
||||
|
||||
const babelOpts = babelrc();
|
||||
babelOpts.runtimeHelpers = true;
|
||||
|
||||
export default {
|
||||
entry: './scripts/generate-custom-parser.js',
|
||||
plugins: [commonjs(), babel(babelOpts)],
|
||||
input: 'scripts/generate-custom-parser.js',
|
||||
plugins: [
|
||||
commonjs(),
|
||||
babel({
|
||||
externalHelpers: false,
|
||||
runtimeHelpers: true,
|
||||
}),
|
||||
],
|
||||
treeshake: true,
|
||||
output: {
|
||||
file: 'dist/generate-custom-parser.js',
|
||||
format: 'cjs',
|
||||
dest: 'dist/generate-custom-parser.js', // equivalent to --output
|
||||
sourceMap: true,
|
||||
},
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user