diff --git a/searx/engines/bing_news.py b/searx/engines/bing_news.py index 6c99c35dc..279f0d698 100644 --- a/searx/engines/bing_news.py +++ b/searx/engines/bing_news.py @@ -56,10 +56,14 @@ def response(resp): link = result.xpath('.//div[@class="newstitle"]/a')[0] url = link.attrib.get('href') title = ' '.join(link.xpath('.//text()')) - content = escape(' '.join(result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()'))) - + contentXPath = result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_snip"]//text()') + if contentXPath != None: + content = escape(' '.join(contentXPath)) + # parse publishedDate - publishedDate = escape(' '.join(result.xpath('.//div[@class="sn_txt"]/div//span[@class="sn_ST"]//span[@class="sn_tm"]//text()'))) + publishedDateXPath = result.xpath('.//div[@class="sn_txt"]/div//span[contains(@class,"sn_ST")]//span[contains(@class,"sn_tm")]//text()') + if publishedDateXPath != None: + publishedDate = escape(' '.join(publishedDateXPath)) if re.match("^[0-9]+ minute(s|) ago$", publishedDate): timeNumbers = re.findall(r'\d+', publishedDate) @@ -74,9 +78,18 @@ def response(resp): publishedDate = datetime.now()\ - timedelta(hours=int(timeNumbers[0]))\ - timedelta(minutes=int(timeNumbers[1])) + elif re.match("^[0-9]+ day(s|) ago$", publishedDate): + timeNumbers = re.findall(r'\d+', publishedDate) + publishedDate = datetime.now()\ + - timedelta(days=int(timeNumbers[0])) else: - publishedDate = parser.parse(publishedDate) - + try: + # FIXME use params['language'] to parse either mm/dd or dd/mm + publishedDate = parser.parse(publishedDate, dayfirst=False) + except TypeError: + # FIXME + publishedDate = datetime.now() + # append result results.append({'url': url, 'title': title, diff --git a/searx/engines/dailymotion.py b/searx/engines/dailymotion.py index c1949cd70..75c2e5071 100644 --- a/searx/engines/dailymotion.py +++ b/searx/engines/dailymotion.py @@ -16,8 +16,8 @@ from lxml import html # engine dependent config categories = ['videos'] -locale = 'en_US' paging = True +language_support = True # search-url # see http://www.dailymotion.com/doc/api/obj-video.html @@ -26,6 +26,11 @@ search_url = 'https://api.dailymotion.com/videos?fields=title,description,durati # do search-request def request(query, params): + if params['language'] == 'all': + locale = 'en-US' + else: + locale = params['language'] + params['url'] = search_url.format( query=urlencode({'search': query, 'localization': locale}), pageno=params['pageno']) diff --git a/searx/engines/yahoo.py b/searx/engines/yahoo.py index 3d048186d..5e34a2b07 100644 --- a/searx/engines/yahoo.py +++ b/searx/engines/yahoo.py @@ -40,9 +40,11 @@ def parse_url(url_string): if endpos > -1: endpositions.append(endpos) - end = min(endpositions) - - return unquote(url_string[start:end]) + if start==0 or len(endpositions) == 0: + return url_string + else: + end = min(endpositions) + return unquote(url_string[start:end]) # do search-request