You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
mercury-parser/dist/generate-custom-parser.js.map

1 line
237 KiB
Plaintext

feat: custom parser + generator + detailed readme instructions Squashed commit of the following: commit 02563daa67712c3679258ebebac60dfa9568dffb Author: Adam Pash <adam.pash@gmail.com> Date: Fri Sep 30 12:25:44 2016 -0400 updated readme, added newyorker parser for readme guide commit 0ac613ef823efbffbf4cc9a89e5cb2489d1c4f6f Author: Adam Pash <adam.pash@gmail.com> Date: Fri Sep 30 11:16:52 2016 -0400 feat: updated parser so the saved fixture absolutizes urls commit 85c7a2660b21f95c2205ca4a4378a7570687fed0 Author: Adam Pash <adam.pash@gmail.com> Date: Fri Sep 30 10:15:26 2016 -0400 refactor: attribute selectors must be an array for custom extractors commit f60f93d5d3d9b2f2d9ec6f28d27ae9dcf16ef01e Author: Adam Pash <adam.pash@gmail.com> Date: Thu Sep 29 10:13:14 2016 -0400 fix: whitelisting srcset and alt attributes commit e31cb1f4e8a9fc9c3d9b20ef9f40ca6c8d6ad51a Author: Adam Pash <adam.pash@gmail.com> Date: Thu Sep 29 09:44:21 2016 -0400 some housekeeping for coverage tests commit 39eafe420c776a1fe7f9fea634fb529a3ed75a71 Author: Adam Pash <adam.pash@gmail.com> Date: Wed Sep 28 17:52:08 2016 -0400 fix: word count for multi-page articles commit b04e0066b52f190481b1b604c64e3d0b1226ff02 Author: Adam Pash <adam.pash@gmail.com> Date: Thu Sep 22 10:40:23 2016 -0400 major improvements to output commit 3f3a880b63b47fe21953485da670b6e291ac60e5 Author: Adam Pash <adam.pash@gmail.com> Date: Wed Sep 21 17:27:53 2016 -0400 updated test command commit 14503426557a870755453572221d95c92cff4bd2 Author: Adam Pash <adam.pash@gmail.com> Date: Wed Sep 21 16:00:30 2016 -0400 shortened generator command commit 5ebd8343cd4b87b3f5787dab665bff0de96846e1 Author: Adam Pash <adam.pash@gmail.com> Date: Wed Sep 21 15:59:14 2016 -0400 feat: can disable fallback to generic parser (this will be useful for testing custom parsers)
8 years ago
{"version":3,"file":null,"sources":["mercury.js","../src/utils/dom/constants.js","../src/utils/dom/strip-junk-tags.js","../src/extractors/generic/content/scoring/constants.js","../src/utils/dom/make-links-absolute.js","../scripts/templates/insert-values.js","../scripts/templates/index.js","../scripts/templates/custom-extractor.js","../scripts/templates/custom-extractor-test.js","../scripts/generate-custom-parser.js"],"sourcesContent":["'use strict';\n\nfunction _interopDefault (ex) { return (ex && (typeof ex === 'object') && 'default' in ex) ? ex['default'] : ex; }\n\nvar _regeneratorRuntime = _interopDefault(require('babel-runtime/regenerator'));\nvar _extends = _interopDefault(require('babel-runtime/helpers/extends'));\nvar _asyncToGenerator = _interopDefault(require('babel-runtime/helpers/asyncToGenerator'));\nvar URL = _interopDefault(require('url'));\nvar babelPolyfill = require('babel-polyfill');\nvar cheerio = _interopDefault(require('cheerio'));\nvar _Promise = _interopDefault(require('babel-runtime/core-js/promise'));\nvar request = _interopDefault(require('request'));\nvar _Reflect$ownKeys = _interopDefault(require('babel-runtime/core-js/reflect/own-keys'));\nvar stringDirection = _interopDefault(require('string-direction'));\nvar _getIterator = _interopDefault(require('babel-runtime/core-js/get-iterator'));\nvar _defineProperty = _interopDefault(require('babel-runtime/helpers/defineProperty'));\nvar _slicedToArray = _interopDefault(require('babel-runtime/helpers/slicedToArray'));\nvar _typeof = _interopDefault(require('babel-runtime/helpers/typeof'));\nvar validUrl = _interopDefault(require('valid-url'));\nvar moment = _interopDefault(require('moment'));\nvar wuzzy = _interopDefault(require('wuzzy'));\nvar difflib = _interopDefault(require('difflib'));\nvar _Array$from = _interopDefault(require('babel-runtime/core-js/array/from'));\nvar ellipsize = _interopDefault(require('ellipsize'));\n\nvar _marked = [range].map(_regeneratorRuntime.mark);\n\nfunction range() {\n var start = arguments.length <= 0 || arguments[0] === undefined ? 1 : arguments[0];\n var end = arguments.length <= 1 || arguments[1] === undefined ? 1 : arguments[1];\n return _regeneratorRuntime.wrap(function range$(_context) {\n while (1) {\n switch (_context.prev = _context.next) {\n case 0:\n if (!(start <= end)) {\n _context.next = 5;\n break;\n }\n\n _context.next = 3;\n return start += 1;\n\n case 3:\n _context.next = 0;\n break;\n\n case 5:\n case \"end\":\n return _context.stop();\n }\n }\n }, _marked[0], this);\n}\n\n// extremely simple url validation as a first step\nfunction validateUrl(_ref) {\n var hostname = _ref.hostname;\n\n // If this isn't a valid url, return an error message\n return !!hostname;\n}\n\nvar Errors = {\n badUrl: {\n error: true,\n messages: 'The url parameter passed does not look like a valid URL. Please check your data and try again.'\n }\n};\n\nvar REQUEST_HEADERS = {\n 'User-Agent': 'Readability - http://readability.com/about/'\n};\n\n// The number of milliseconds to attempt to fetch a resource before timing out.\nvar FETCH_TIMEOUT = 10000;\n\n// Content types that we do not extract content from\nvar BAD_CONTENT_TYPES = ['audio/mpeg', 'image/gif', 'image/jpeg', 'image/jpg'];\n\nvar BAD_CONTENT_TYPES_RE = new RegExp('^(' + BAD_CONTENT_TYPES.join('|') + ')$', 'i');\n\n// Use this setting as the maximum size an article can be\n// for us to attempt parsing. Defaults to 5 MB.\nvar MAX_CONTENT_LENGTH = 5242880;\n\nfunction get(options) {\n return new _Promise(function (resolve, reject) {\n request(options, function (err, response, body) {\n if (err) {\n reject(err);\n } else {\n resolve({ body: body, response: response });\n }\n });\n });\n}\n\n// Evaluate a response to ensure it's something we should be keeping.\n// This does not validate in the sense of a response being 200 level or\n// not. Validation here means that we haven't found r