[fix] skip non-complete google news results

dependabot/pip/master/sphinx-6.1.3
Adam Tauber 8 years ago
parent 94327d67fc
commit 108392f8da

@ -66,11 +66,14 @@ def response(resp):
# parse results # parse results
for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'): for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'):
r = { try:
'url': result.xpath('.//div[@class="_cnc"]//a/@href')[0], r = {
'title': ''.join(result.xpath('.//div[@class="_cnc"]//h3//text()')), 'url': result.xpath('.//div[@class="_cnc"]//a/@href')[0],
'content': ''.join(result.xpath('.//div[@class="st"]//text()')), 'title': ''.join(result.xpath('.//div[@class="_cnc"]//h3//text()')),
} 'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
}
except:
continue
imgs = result.xpath('.//img/@src') imgs = result.xpath('.//img/@src')
if len(imgs) and not imgs[0].startswith('data'): if len(imgs) and not imgs[0].startswith('data'):

Loading…
Cancel
Save