Add a penalty for double quote chars in paragraphs.

- They are far more common in random commented code and proprietary metadata
  that keeps slipping by the filter as actual content.
- Downgraded the score value of commas for the same reason.
- Prep for 0.1.10 release with these changes.

Add credits and tweak the " and , scoring

Update version and update the scoring code
pull/14/head
Nathan Nifong 12 years ago committed by Richard Harding
parent 60da675da5
commit 920094c81a

@ -0,0 +1,2 @@
Rick Harding
nhnifong

@ -6,6 +6,14 @@
News
====
0.1.10
-------
* Release date: Sept 13th 2012*
* Updated scoring bonus and penalty with , and " characters.
0.1.9
------

@ -6,7 +6,7 @@ README = open(os.path.join(here, 'README.rst')).read()
NEWS = open(os.path.join(here, 'NEWS.txt')).read()
version = '0.1.9'
version = '0.1.10'
install_requires = [
# List your project dependencies here.
# For more details, see:

@ -1,3 +1,3 @@
VERSION = '0.1.9'
VERSION = '0.1.10'
import client
from scripts import newtest

@ -140,21 +140,29 @@ def score_candidates(nodes):
# Add a point for the paragraph itself as a base.
content_score += 1
# Add points for any commas within this paragraph
content_score += innertext.count(',') if innertext else 0
LNODE.log(node, 1, "Bonus points for ,: " + str(innertext.count(',')))
# For every 100 characters in this paragraph, add another point. Up to
# 3 points.
length_points = len(innertext) / 100 if innertext else 0
if length_points > 3:
content_score += 3
else:
content_score += length_points
LNODE.log(
node, 1,
"Length/content points: {0} : {1}".format(length_points,
content_score))
if innertext:
# Add 0.25 points for any commas within this paragraph
content_score += innertext.count(',') * 0.25
LNODE.log(node, 1,
"Bonus points for ,: " + str(innertext.count(',')))
# Subtract 0.5 points for each double quote within this paragraph
content_score += innertext.count('"') * (-0.5)
LNODE.log(node, 1,
'Penalty points for ": ' + str(innertext.count('"')))
# For every 100 characters in this paragraph, add another point.
# Up to 3 points.
length_points = len(innertext) / 100
if length_points > 3:
content_score += 3
else:
content_score += length_points
LNODE.log(
node, 1,
"Length/content points: {0} : {1}".format(length_points,
content_score))
# Add the score to the parent.
LNODE.log(node, 1, "From this current node.")

Loading…
Cancel
Save