[FIX] google videos thumbnails

dependabot/pip/master/sphinx-6.1.3
Venca24 6 years ago
parent cee15f0375
commit cf26aba93b

@ -7,15 +7,16 @@
@using-api no @using-api no
@results HTML @results HTML
@stable no @stable no
@parse url, title, content @parse url, title, content, thumbnail
""" """
from datetime import date, timedelta from datetime import date, timedelta
from json import loads from json import loads
from lxml import html from lxml import html
from searx.engines import logger
from searx.engines.xpath import extract_text from searx.engines.xpath import extract_text
from searx.url_utils import urlencode from searx.url_utils import urlencode
import re
# engine dependent config # engine dependent config
categories = ['videos'] categories = ['videos']
@ -73,11 +74,24 @@ def response(resp):
url = result.xpath('.//div[@class="r"]/a/@href')[0] url = result.xpath('.//div[@class="r"]/a/@href')[0]
content = extract_text(result.xpath('.//span[@class="st"]')) content = extract_text(result.xpath('.//span[@class="st"]'))
# get thumbnails
script = str(dom.xpath('//script[contains(., "_setImagesSrc")]')[0].text)
id = result.xpath('.//div[@class="s"]//img/@id')[0]
thumbnails_data = re.findall('s=\'(.*?)(?:\\\\[a-z,1-9,\\\\]+\'|\')\;var ii=\[(?:|[\'vidthumb\d+\',]+)\'' + id,
script)
logger.debug('google video engine: ' + id + ' matched ' + str(len(thumbnails_data)) + ' times (thumbnail)')
tmp = []
if len(thumbnails_data) != 0:
tmp = re.findall('(data:image/jpeg;base64,[a-z,A-Z,0-9,/,\+]+)', thumbnails_data[0])
thumbnail = ''
if len(tmp) != 0:
thumbnail = tmp[-1]
# append result # append result
results.append({'url': url, results.append({'url': url,
'title': title, 'title': title,
'content': content, 'content': content,
'thumbnail': '', 'thumbnail': thumbnail,
'template': 'videos.html'}) 'template': 'videos.html'})
return results return results

Loading…
Cancel
Save