feat: generic extractor for word count
Squashed commit of the following: commit 0aba26ef9efba71a72c76fa351a9037e97fc1e9e Author: Adam Pash <adam.pash@gmail.com> Date: Wed Sep 14 14:56:45 2016 -0400 fix: normalizeSpaces regex fix broke a test commit 07d60c1c8c6599d6c94d92e5a70649c28d03d6ea Author: Adam Pash <adam.pash@gmail.com> Date: Wed Sep 14 14:52:41 2016 -0400 feat: generic extractor for word countpull/3/head
parent
76df30e303
commit
daa9266182
@ -0,0 +1,14 @@
|
||||
import cheerio from 'cheerio'
|
||||
|
||||
import { normalizeSpaces } from 'utils/text'
|
||||
|
||||
const GenericWordCountExtractor = {
|
||||
extract({ content }) {
|
||||
const $ = cheerio.load(content)
|
||||
|
||||
const text = normalizeSpaces($('div').first().text())
|
||||
return text.split(/\s/).length
|
||||
},
|
||||
}
|
||||
|
||||
export default GenericWordCountExtractor
|
@ -0,0 +1,21 @@
|
||||
import assert from 'assert'
|
||||
|
||||
import GenericWordCountExtractor from './extractor'
|
||||
|
||||
describe('GenericWordCountExtractor', () => {
|
||||
describe('extact({ content })', () => {
|
||||
it('counts words', () => {
|
||||
const content = `
|
||||
<div>
|
||||
<p>One two three.</p>
|
||||
<p>Four five six.</p>
|
||||
<p>Seven eight nine.</p>
|
||||
<p>Ten eleven twelve.</p>
|
||||
`
|
||||
|
||||
const wordCount = GenericWordCountExtractor.extract({ content })
|
||||
|
||||
assert.equal(wordCount, 12)
|
||||
})
|
||||
})
|
||||
})
|
Loading…
Reference in New Issue