[fix] indexing ++ url extraction

This commit is contained in:
Adam Tauber 2014-03-21 16:36:13 +01:00
parent 3854703d95
commit 01c2eeb8ff

View File

@ -47,8 +47,11 @@ def response(resp):
dom = html.fromstring(resp.text) dom = html.fromstring(resp.text)
for result in dom.xpath(results_xpath): for result in dom.xpath(results_xpath):
url = parse_url(extract_url(result.xpath(url_xpath), search_url)) try:
title = extract_text(result.xpath(title_xpath)[0]) url = parse_url(extract_url(result.xpath(url_xpath), search_url))
title = extract_text(result.xpath(title_xpath)[0])
except:
continue
content = extract_text(result.xpath(content_xpath)[0]) content = extract_text(result.xpath(content_xpath)[0])
results.append({'url': url, 'title': title, 'content': content}) results.append({'url': url, 'title': title, 'content': content})