[crunchyroll] Improve extraction failsafeness (closes #17991)

This commit is contained in:
Sergey M․ 2018-10-28 22:12:54 +07:00
parent 5e733b066a
commit 08c7d3dade
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -3,6 +3,7 @@
import re import re
import json import json
import xml.etree.ElementTree as etree
import zlib import zlib
from hashlib import sha1 from hashlib import sha1
@ -398,7 +399,7 @@ def _get_subtitles(self, video_id, webpage):
'Downloading subtitles for ' + sub_name, data={ 'Downloading subtitles for ' + sub_name, data={
'subtitle_script_id': sub_id, 'subtitle_script_id': sub_id,
}) })
if sub_doc is None: if not isinstance(sub_doc, etree.Element):
continue continue
sid = sub_doc.get('id') sid = sub_doc.get('id')
iv = xpath_text(sub_doc, 'iv', 'subtitle iv') iv = xpath_text(sub_doc, 'iv', 'subtitle iv')
@ -515,7 +516,7 @@ def _real_extract(self, url):
'video_quality': stream_quality, 'video_quality': stream_quality,
'current_page': url, 'current_page': url,
}) })
if streamdata is not None: if isinstance(streamdata, etree.Element):
stream_info = streamdata.find('./{default}preload/stream_info') stream_info = streamdata.find('./{default}preload/stream_info')
if stream_info is not None: if stream_info is not None:
stream_infos.append(stream_info) stream_infos.append(stream_info)
@ -526,7 +527,7 @@ def _real_extract(self, url):
'video_format': stream_format, 'video_format': stream_format,
'video_encode_quality': stream_quality, 'video_encode_quality': stream_quality,
}) })
if stream_info is not None: if isinstance(stream_info, etree.Element):
stream_infos.append(stream_info) stream_infos.append(stream_info)
for stream_info in stream_infos: for stream_info in stream_infos:
video_encode_id = xpath_text(stream_info, './video_encode_id') video_encode_id = xpath_text(stream_info, './video_encode_id')
@ -598,10 +599,22 @@ def _real_extract(self, url):
series = self._html_search_regex( series = self._html_search_regex(
r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d', r'(?s)<h\d[^>]+\bid=["\']showmedia_about_episode_num[^>]+>(.+?)</h\d',
webpage, 'series', fatal=False) webpage, 'series', fatal=False)
season = xpath_text(metadata, 'series_title')
episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title') season = episode = episode_number = duration = thumbnail = None
episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
if isinstance(metadata, etree.Element):
season = xpath_text(metadata, 'series_title')
episode = xpath_text(metadata, 'episode_title')
episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
duration = float_or_none(media_metadata.get('duration'), 1000)
thumbnail = xpath_text(metadata, 'episode_image_url')
if not episode:
episode = media_metadata.get('title')
if not episode_number:
episode_number = int_or_none(media_metadata.get('episode_number'))
if not thumbnail:
thumbnail = media_metadata.get('thumbnail', {}).get('url')
season_number = int_or_none(self._search_regex( season_number = int_or_none(self._search_regex(
r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)', r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
@ -611,8 +624,8 @@ def _real_extract(self, url):
'id': video_id, 'id': video_id,
'title': video_title, 'title': video_title,
'description': video_description, 'description': video_description,
'duration': float_or_none(media_metadata.get('duration'), 1000), 'duration': duration,
'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'), 'thumbnail': thumbnail,
'uploader': video_uploader, 'uploader': video_uploader,
'upload_date': video_upload_date, 'upload_date': video_upload_date,
'series': series, 'series': series,