fix: better scoring for iamge extensions

pull/1/head
Adam Pash 8 years ago
parent 11a2286659
commit bc97156718

@ -1,8 +1,6 @@
TODO:
- Check that lead-image-url extractor isn't looking for end-of-string file extension matches (i.e., it could be ...foo.jpg?otherstuff
- Resource (fetches page, validates it, cleans it, normalizes meta tags (!), converts lazy-loaded images, makes links absolute, etc)
- extractNextPageUrl
- Try Closure webpack compiler
- Rename all cleaners from cleanThing to clean
- Make sure weightNodes flag is being passed properly
- Get better sense of when cheerio returns a raw node and when a cheerio object
@ -12,6 +10,7 @@ TODO:
- Separate constants into activity-specific folders (dom, scoring)
DONE:
x Check that lead-image-url extractor isn't looking for end-of-string file extension matches (i.e., it could be ...foo.jpg?otherstuff
x extractLeadImageUrl
x extractDek
x extractDatePublished

@ -49,5 +49,5 @@ export const NEGATIVE_LEAD_IMAGE_URL_HINTS = [
]
export const NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i')
export const GIF_RE = /\gif$/i
export const JPG_RE = /\jpe?g$/i
export const GIF_RE = /\.gif(\?.*)?$/i
export const JPG_RE = /\.jpe?g(\?.*)?$/i

@ -42,6 +42,9 @@ describe('scoreImageUrlUrl(url)', () => {
const url3 = 'http://example.com/foojpg/bar'
assert.equal(scoreImageUrl(url3), 0)
const url4 = 'http://example.com/foo.jpg?bar=baz'
assert.equal(scoreImageUrl(url4), 10)
})
})

Loading…
Cancel
Save