[tube8] fix extractor (fixes #5846)

This commit is contained in:
pulpe 2015-05-30 09:30:14 +02:00
parent fafec39d41
commit 6ebdfe43e4

View File

@ -47,7 +47,7 @@ def _real_extract(self, url):
webpage = self._download_webpage(req, display_id) webpage = self._download_webpage(req, display_id)
flashvars = json.loads(self._html_search_regex( flashvars = json.loads(self._html_search_regex(
r'var flashvars\s*=\s*({.+?})', webpage, 'flashvars')) r'flashvars\s*=\s*({.+?})', webpage, 'flashvars'))
video_url = flashvars['video_url'] video_url = flashvars['video_url']
if flashvars.get('encrypted') is True: if flashvars.get('encrypted') is True:
@ -58,19 +58,19 @@ def _real_extract(self, url):
thumbnail = flashvars.get('image_url') thumbnail = flashvars.get('image_url')
title = self._html_search_regex( title = self._html_search_regex(
r'videotitle\s*=\s*"([^"]+)', webpage, 'title') r'videoTitle\s*=\s*"([^"]+)', webpage, 'title')
description = self._html_search_regex( description = self._html_search_regex(
r'>Description:</strong>(.+?)<', webpage, 'description', fatal=False) r'>Description:</strong>\s*(.+?)\s*<', webpage, 'description', fatal=False)
uploader = self._html_search_regex( uploader = self._html_search_regex(
r'<strong class="video-username">(?:<a href="[^"]+">)?([^<]+)(?:</a>)?</strong>', r'<span class="username">\s*(.+?)\s*<',
webpage, 'uploader', fatal=False) webpage, 'uploader', fatal=False)
like_count = int_or_none(self._html_search_regex( like_count = int_or_none(self._html_search_regex(
r"rupVar\s*=\s*'(\d+)'", webpage, 'like count', fatal=False)) r'rupVar\s*=\s*"(\d+)"', webpage, 'like count', fatal=False))
dislike_count = int_or_none(self._html_search_regex( dislike_count = int_or_none(self._html_search_regex(
r"rdownVar\s*=\s*'(\d+)'", webpage, 'dislike count', fatal=False)) r'rdownVar\s*=\s*"(\d+)"', webpage, 'dislike count', fatal=False))
view_count = self._html_search_regex( view_count = self._html_search_regex(
r'<strong>Views: </strong>([\d,\.]+)</li>', webpage, 'view count', fatal=False) r'<strong>Views: </strong>([\d,\.]+)\s*</li>', webpage, 'view count', fatal=False)
if view_count: if view_count:
view_count = str_to_int(view_count) view_count = str_to_int(view_count)
comment_count = self._html_search_regex( comment_count = self._html_search_regex(