mirror of
https://github.com/postlight/mercury-parser
synced 2024-11-17 03:25:31 +00:00
fix: better scoring for iamge extensions
This commit is contained in:
parent
11a2286659
commit
bc97156718
3
TODO.md
3
TODO.md
@ -1,8 +1,6 @@
|
|||||||
TODO:
|
TODO:
|
||||||
- Check that lead-image-url extractor isn't looking for end-of-string file extension matches (i.e., it could be ...foo.jpg?otherstuff
|
|
||||||
- Resource (fetches page, validates it, cleans it, normalizes meta tags (!), converts lazy-loaded images, makes links absolute, etc)
|
- Resource (fetches page, validates it, cleans it, normalizes meta tags (!), converts lazy-loaded images, makes links absolute, etc)
|
||||||
- extractNextPageUrl
|
- extractNextPageUrl
|
||||||
- Try Closure webpack compiler
|
|
||||||
- Rename all cleaners from cleanThing to clean
|
- Rename all cleaners from cleanThing to clean
|
||||||
- Make sure weightNodes flag is being passed properly
|
- Make sure weightNodes flag is being passed properly
|
||||||
- Get better sense of when cheerio returns a raw node and when a cheerio object
|
- Get better sense of when cheerio returns a raw node and when a cheerio object
|
||||||
@ -12,6 +10,7 @@ TODO:
|
|||||||
- Separate constants into activity-specific folders (dom, scoring)
|
- Separate constants into activity-specific folders (dom, scoring)
|
||||||
|
|
||||||
DONE:
|
DONE:
|
||||||
|
x Check that lead-image-url extractor isn't looking for end-of-string file extension matches (i.e., it could be ...foo.jpg?otherstuff
|
||||||
x extractLeadImageUrl
|
x extractLeadImageUrl
|
||||||
x extractDek
|
x extractDek
|
||||||
x extractDatePublished
|
x extractDatePublished
|
||||||
|
@ -49,5 +49,5 @@ export const NEGATIVE_LEAD_IMAGE_URL_HINTS = [
|
|||||||
]
|
]
|
||||||
export const NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i')
|
export const NEGATIVE_LEAD_IMAGE_URL_HINTS_RE = new RegExp(NEGATIVE_LEAD_IMAGE_URL_HINTS.join('|'), 'i')
|
||||||
|
|
||||||
export const GIF_RE = /\gif$/i
|
export const GIF_RE = /\.gif(\?.*)?$/i
|
||||||
export const JPG_RE = /\jpe?g$/i
|
export const JPG_RE = /\.jpe?g(\?.*)?$/i
|
||||||
|
@ -42,6 +42,9 @@ describe('scoreImageUrlUrl(url)', () => {
|
|||||||
|
|
||||||
const url3 = 'http://example.com/foojpg/bar'
|
const url3 = 'http://example.com/foojpg/bar'
|
||||||
assert.equal(scoreImageUrl(url3), 0)
|
assert.equal(scoreImageUrl(url3), 0)
|
||||||
|
|
||||||
|
const url4 = 'http://example.com/foo.jpg?bar=baz'
|
||||||
|
assert.equal(scoreImageUrl(url4), 10)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user