From 7f1e2ba211a1bd8f2068aadddba5173440d28e36 Mon Sep 17 00:00:00 2001 From: Dalf Date: Sun, 7 Sep 2014 17:14:42 +0200 Subject: [PATCH 1/3] [enh] dailymotion engine: add language support --- searx/engines/dailymotion.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index c1949cd70..75c2e5071 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -16,8 +16,8 @@ from lxml import html # engine dependent config categories = ['videos'] -locale = 'en_US' paging = True +language_support = True # search-url # see http://www.dailymotion.com/doc/api/obj-video.html @@ -26,6 +26,11 @@ search_url = 'https://api.dailymotion.com/videos?fields=title,description,durati # do search-request def request(query, params): + if params['language'] == 'all': + locale = 'en-US' + else: + locale = params['language'] + params['url'] = search_url.format( query=urlencode({'search': query, 'localization': locale}), pageno=params['pageno']) From a92e3ba4464853eeb44ed77661b763e75f619e5e Mon Sep 17 00:00:00 2001 From: Dalf Date: Sun, 7 Sep 2014 18:10:05 +0200 Subject: [PATCH 2/3] [fix] bing_new engine : fix published date parsing --- searx/engines/bing_news.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 6c99c35dc..279f0d698 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -56,10 +56,14 @@ def response(resp): link = result.xpath('.//div[@class="newstitle"]/a')[0] url = link.attrib.get('href') title = ' '.join(link.xpath('.//text()')) - content = escape(' '.join(result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()'))) - + contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()') + if contentXPath != None: + content = escape(' '.join(contentXPath)) + # parse publishedDate - publishedDate = escape(' '.join(result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_ST"]//span[@class="sn_tm"]//text()'))) + publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div//span[contains(@class,"sn_ST")]//span[contains(@class,"sn_tm")]//text()') + if publishedDateXPath != None: + publishedDate = escape(' '.join(publishedDateXPath)) if re.match("^[0-9]+ minute(s|) ago$", publishedDate): timeNumbers = re.findall(r'\d+', publishedDate) @@ -74,9 +78,18 @@ def response(resp): publishedDate = datetime.now()\ - timedelta(hours=int(timeNumbers[0]))\ - timedelta(minutes=int(timeNumbers[1])) + elif re.match("^[0-9]+ day(s|) ago$", publishedDate): + timeNumbers = re.findall(r'\d+', publishedDate) + publishedDate = datetime.now()\ + - timedelta(days=int(timeNumbers[0])) else: - publishedDate = parser.parse(publishedDate) - + try: + # FIXME use params['language'] to parse either mm/dd or dd/mm + publishedDate = parser.parse(publishedDate, dayfirst=False) + except TypeError: + # FIXME + publishedDate = datetime.now() + # append result results.append({'url': url, 'title': title, From a4ffeddce1bc56b0faa548e0485ccd6374c4e9d1 Mon Sep 17 00:00:00 2001 From: Dalf Date: Sun, 7 Sep 2014 18:42:56 +0200 Subject: [PATCH 3/3] [fix] yahoo engines: parse_url doesn't throw an exception with not tracking URL --- searx/engines/yahoo.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 3d048186d..5e34a2b07 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -40,9 +40,11 @@ def parse_url(url_string): if endpos > -1: endpositions.append(endpos) - end = min(endpositions) - - return unquote(url_string[start:end]) + if start==0 or len(endpositions) == 0: + return url_string + else: + end = min(endpositions) + return unquote(url_string[start:end]) # do search-request