Merge remote-tracking branch 'asciimoo/master'

pull/1/head
Thomas Pointhuber 10 years ago
commit 07f83cab22

@ -6,7 +6,7 @@ from json import loads
categories = ['news'] categories = ['news']
url = 'https://ajax.googleapis.com/' url = 'https://ajax.googleapis.com/'
search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa search_url = url + 'ajax/services/search/news?v=2.0&start={offset}&rsz=large&safe=off&filter=off&{query}&hl={language}' # noqa
paging = True paging = True
language_support = True language_support = True

@ -35,7 +35,7 @@ def response(resp):
for result in dom.xpath(results_xpath): for result in dom.xpath(results_xpath):
url_string = extract_url(result.xpath(url_xpath), search_url) url_string = extract_url(result.xpath(url_xpath), search_url)
start = url_string.find('/RU=')+4 start = url_string.find('http', url_string.find('/RU=')+1)
end = url_string.rfind('/RS') end = url_string.rfind('/RS')
url = unquote(url_string[start:end]) url = unquote(url_string[start:end])
title = extract_text(result.xpath(title_xpath)[0]) title = extract_text(result.xpath(title_xpath)[0])

@ -35,7 +35,7 @@ def response(resp):
for result in dom.xpath(results_xpath): for result in dom.xpath(results_xpath):
url_string = extract_url(result.xpath(url_xpath), search_url) url_string = extract_url(result.xpath(url_xpath), search_url)
start = url_string.find('/RU=')+4 start = url_string.find('http', url_string.find('/RU=')+1)
end = url_string.rfind('/RS') end = url_string.rfind('/RS')
url = unquote(url_string[start:end]) url = unquote(url_string[start:end])
title = extract_text(result.xpath(title_xpath)[0]) title = extract_text(result.xpath(title_xpath)[0])

@ -4,12 +4,15 @@ import csv
from codecs import getincrementalencoder from codecs import getincrementalencoder
import cStringIO import cStringIO
import re import re
from random import choice
ua_versions = ('26.0', '27.0', '28.0')
ua_os = ('Windows NT 6.3; WOW64', 'X11; Linux x86_64; rv:26.0')
ua = "Mozilla/5.0 ({os}) Gecko/20100101 Firefox/{version}"
def gen_useragent(): def gen_useragent():
# TODO # TODO
ua = "Mozilla/5.0 (X11; Linux x86_64; rv:26.0) Gecko/20100101 Firefox/26.0" return ua.format(os=choice(ua_os), version=choice(ua_versions))
return ua
def highlight_content(content, query): def highlight_content(content, query):

Loading…
Cancel
Save