Increased the number of images allowed compared to the number of paragraphs.

New algorithm multiplies the number of paragraphs by two, in essences allowing twice as many images as paragraphs. This should be a better fit for blog posts with many images, such as recipes.
pull/44/head
anoras 11 years ago
parent d8595b7103
commit 5a4974496f

1
.gitignore vendored

@ -1,3 +1,4 @@
.idea
*.pyc
*.egg-info
build

@ -473,7 +473,7 @@ class Document:
#if el.tag == 'div' and counts["img"] >= 1:
# continue
if counts["p"] and counts["img"] > counts["p"]:
if counts["p"] and counts["img"] > counts["p"] * 2:
reason = "too many images (%s)" % counts["img"]
to_remove = True
elif counts["li"] > counts["p"] and tag != "ul" and tag != "ol":

@ -193,19 +193,19 @@ if (cnnPage.isHomepage) {
*/
//--></script>
</dl></a></li>
<li class="cnnItem1"><dl><script type="text/javascript">
var min=1;
var max=2;
x = Math.floor(Math.random() * (max - min + 1)) + min;
/*turning off 50/50 for now*/
/*if(x/2 == 1) {
document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe2"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/button_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dt>');
document.write('<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe4"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/dropdown_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dd>');
} else {*/
document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe2"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/button_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dt>');
document.write('<dd style="margin-left:-79px"><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe4"><img src="http://i.cdn.turner.com/si/2012_images/cm/bn_2osi16579_290x162_v1.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dd>');
//}
</script>
<li class="cnnItem1"><dl><script type="text/javascript">
var min=1;
var max=2;
x = Math.floor(Math.random() * (max - min + 1)) + min;
/*turning off 50/50 for now*/
/*if(x/2 == 1) {
document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe2"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/button_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dt>');
document.write('<dd><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe4"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/dropdown_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dd>');
} else {*/
document.write('<dt><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe2"><img src="http://i.cdn.turner.com/si/.element/img/4.1/global/cm/button_subscribe_si_red.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dt>');
document.write('<dd style="margin-left:-79px"><a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1002346.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe4"><img src="http://i.cdn.turner.com/si/2012_images/cm/bn_2osi16579_290x162_v1.png" alt="Subscribe to SI" title="Subscribe to SI"/></a></dd>');
//}
</script>
</dl></li>
<li class="cnnItem2"><dl><!--Default ROS
<a href="https://subscription.si.com/storefront/subscribe-to-sports-illustrated/link/1001406.html" target="_blank" rel="nofollow" id="cnn_cm_subscribe3"><img src="http://i.cdn.turner.com/si/2012_images/cm/si-btn3_170x30_sigift.png" alt="Give the Gift of SI" title="Give the Gift of SI"/></a>
@ -586,8 +586,8 @@ DIV.cnnTopnav LI.cnnFirst { padding-left:0px; }
<!-- end content -->
<!-- start contentFooter -->
<div class="cnnWideSL"><script type="text/javascript">adsonar_placementId=1488671;adsonar_pid=769769;adsonar_ps=-1;adsonar_zw=978;adsonar_zh=150;</script><script>cnnad_createSL();</script></div>
<!-- start footerbox -->
<div class="cnnWideSL"><script type="text/javascript">adsonar_placementId=1488671;adsonar_pid=769769;adsonar_ps=-1;adsonar_zw=978;adsonar_zh=150;</script><script>cnnad_createSL();</script></div>
<!-- start footerbox -->
<div class="cnnFooterBox">
<div class="cnnHolder">
<div class="cnnRight">
@ -630,17 +630,17 @@ DIV.cnnTopnav LI.cnnFirst { padding-left:0px; }
</div>
</div>
</div>
<!-- end footerbox -->
<!-- start searchbar -->
<div class="cnnSearchFooter">
<div class="cnnCenter"><form method="get" action="http://sportsillustrated.cnn.com/search/" name="footer_search"><input id="searchInputFooter" type="text" name="text" class="cnnLeft"/><input type="image" src="http://i.cdn.turner.com/si/.element/img/4.1/global/search.gif" alt="Search" title="Search" class="cnnRight"/></form></div>
</div>
<!-- end searchbar -->
<!--START OF PAGELINKS.JS-->
<!-- end footerbox -->
<!-- start searchbar -->
<div class="cnnSearchFooter">
<div class="cnnCenter"><form method="get" action="http://sportsillustrated.cnn.com/search/" name="footer_search"><input id="searchInputFooter" type="text" name="text" class="cnnLeft"/><input type="image" src="http://i.cdn.turner.com/si/.element/img/4.1/global/search.gif" alt="Search" title="Search" class="cnnRight"/></form></div>
</div>
<!-- end searchbar -->
<!--START OF PAGELINKS.JS-->
<script language="Javascript">// Post Processing code to update links with tracking references
var url = window.location.href.toString();
@ -692,9 +692,9 @@ if (cnnPage.isHomepage) {
}
/* Poll frame height issue */
if ($e('cnnPollFrame')) { $e('cnnPollFrame').setAttribute('height','169'); }
}</script>
<!--END OF PAGELINKS.JS-->
}</script>
<!--END OF PAGELINKS.JS-->
</div>
<div><!-- move tracking out of cnnpage -->
<!-- ADBP/JSMD -->
@ -753,7 +753,7 @@ _qoptions={
<!-- /TIIAD -->
<script src="http://i.cdn.turner.com/si/.e1d/js/4.1/global/pagelinks.js" type="text/javascript"></script>
<script src="http://i.cdn.turner.com/si/.e1d/js/4.1/global/subnav.js" type="text/javascript"></script>
<script src="http://i.cdn.turner.com/si/.e1d/js/4.1/global/subnav.js" type="text/javascript"></script>
<!-- end contentFooter -->

File diff suppressed because one or more lines are too long

@ -37,3 +37,9 @@ class TestArticleOnly(unittest.TestCase):
res = doc.summary(html_partial=True)
self.assertEqual('<div><div class="', res[0:17])
def test_too_many_images_sample_html_partial(self):
sample = load_sample('too-many-images.sample.html')
doc = Document(sample)
res = doc.summary(html_partial=True)
self.assertEqual('<div><div class="post-body', res[0:26])

Loading…
Cancel
Save