mercury-parser/src/extractors/generic/dek/extractor.js

// import {
//   DEK_META_TAGS,
//   DEK_SELECTORS,
//   DEK_URL_RES,
// } from './constants';

// import { cleanDek } from 'cleaners';

// import {
//   extractFromMeta,
//   extractFromSelectors,
// } from 'utils/dom';

// Currently there is only one selector for
// deks. We should simply return null here
// until we have a more robust generic option.
// Below is the original source for this, for reference.
const GenericDekExtractor = {
  // extract({ $, content, metaCache }) {
  extract() {
    return null;
  },
};

export default GenericDekExtractor;

// def extract_dek(self):
//     # First, check to see if we have a matching meta tag that we can make
//     # use of.
//     dek = self.extract_from_meta('dek', constants.DEK_META_TAGS)
//     if not dek:
//         # Second, look through our CSS/XPath selectors. This may return
//         # an HTML fragment.
//         dek = self.extract_from_selectors('dek',
//                                            constants.DEK_SELECTORS,
//                                            text_only=False)
//
//     if dek:
//         # Make sure our dek isn't in the first few thousand characters
//         # of the content, otherwise it's just the start of the article
//         # and not a true dek.
//         content = self.extract_content()
//         content_chunk = normalize_spaces(strip_tags(content[:2000]))
//         dek_chunk = normalize_spaces(dek[:100]) # Already has no tags.
//
//         # 80% or greater similarity means the dek was very similar to some
//         # of the starting content, so we skip it.
//         if fuzz.partial_ratio(content_chunk, dek_chunk) < 80:
//             return dek
//
//     return None