From 705e7c2005dfe67a905e18736c9f6345ee9d386b Mon Sep 17 00:00:00 2001 From: pukkandan Date: Wed, 6 Oct 2021 10:53:22 +0530 Subject: [PATCH] [Hidive] Fix duplicate and incorrect formats --- yt_dlp/extractor/hidive.py | 85 +++++++++++++++----------------------- 1 file changed, 34 insertions(+), 51 deletions(-) diff --git a/yt_dlp/extractor/hidive.py b/yt_dlp/extractor/hidive.py index 90457b77e..909d1fbc1 100644 --- a/yt_dlp/extractor/hidive.py +++ b/yt_dlp/extractor/hidive.py @@ -1,8 +1,6 @@ # coding: utf-8 from __future__ import unicode_literals -import re - from .common import InfoExtractor from ..utils import ( ExtractorError, @@ -14,7 +12,7 @@ from ..utils import ( class HiDiveIE(InfoExtractor): - _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P[^/]+)/(?P<key>[^/?#&]+)' + _VALID_URL = r'https?://(?:www\.)?hidive\.com/stream/(?P<id>(?P<title>[^/]+)/(?P<key>[^/?#&]+))' # Using X-Forwarded-For results in 403 HTTP error for HLS fragments, # so disabling geo bypass completely _GEO_BYPASS = False @@ -55,68 +53,53 @@ class HiDiveIE(InfoExtractor): self._LOGIN_URL, None, 'Logging in', data=urlencode_postdata(data)) def _real_extract(self, url): - mobj = self._match_valid_url(url) - title, key = mobj.group('title', 'key') - video_id = '%s/%s' % (title, key) - webpage = self._download_webpage(url, video_id, fatal=False) - data_videos = re.findall(r'data-video=\"([^\"]+)\"\s?data-captions=\"([^\"]+)\"', webpage) - formats = [] - subtitles = {} - for data_video in data_videos: - _, _, _, version, audio, _, extra = data_video[0].split('_') - caption = data_video[1] - - settings = self._download_json( - 'https://www.hidive.com/play/settings', video_id, - data=urlencode_postdata({ - 'Title': title, - 'Key': key, - 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783', - 'Version': version, - 'Audio': audio, - 'Captions': caption, - 'Extra': extra, - })) + video_id, title, key = self._match_valid_url(url).group('id', 'title', 'key') + settings = self._download_json( + 'https://www.hidive.com/play/settings', video_id, + data=urlencode_postdata({ + 'Title': title, + 'Key': key, + 'PlayerId': 'f4f895ce1ca713ba263b91caeb1daa2d08904783', + })) - restriction = settings.get('restrictionReason') - if restriction == 'RegionRestricted': - self.raise_geo_restricted() + restriction = settings.get('restrictionReason') + if restriction == 'RegionRestricted': + self.raise_geo_restricted() + if restriction and restriction != 'None': + raise ExtractorError( + '%s said: %s' % (self.IE_NAME, restriction), expected=True) - if restriction and restriction != 'None': - raise ExtractorError( - '%s said: %s' % (self.IE_NAME, restriction), expected=True) - - for rendition_id, rendition in settings['renditions'].items(): - m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls'])) - if not m3u8_url: - continue + formats, subtitles, urls = [], {}, {None} + for rendition_id, rendition in settings['renditions'].items(): + audio, version, extra = rendition_id.split('_') + m3u8_url = url_or_none(try_get(rendition, lambda x: x['bitrates']['hls'])) + if m3u8_url not in urls: + urls.add(m3u8_url) frmt = self._extract_m3u8_formats( - m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', - m3u8_id='%s-%s-%s-%s' % (version, audio, extra, caption), fatal=False) + m3u8_url, video_id, 'mp4', entry_protocol='m3u8_native', m3u8_id=rendition_id, fatal=False) for f in frmt: f['language'] = audio + f['format_note'] = f'{version}, {extra}' formats.extend(frmt) - for cc_file in rendition.get('ccFiles', []): - cc_url = url_or_none(try_get(cc_file, lambda x: x[2])) - # name is used since we cant distinguish subs with same language code - cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str) - if cc_url and cc_lang: - subtitles.setdefault(cc_lang, []).append({'url': cc_url}) + for cc_file in rendition.get('ccFiles', []): + cc_url = url_or_none(try_get(cc_file, lambda x: x[2])) + # name is used since we cant distinguish subs with same language code + cc_lang = try_get(cc_file, (lambda x: x[1].replace(' ', '-').lower(), lambda x: x[0]), str) + if cc_url not in urls and cc_lang: + urls.add(cc_url) + subtitles.setdefault(cc_lang, []).append({'url': cc_url}) self._sort_formats(formats) - season_number = int_or_none(self._search_regex( - r's(\d+)', key, 'season number', default=None)) - episode_number = int_or_none(self._search_regex( - r'e(\d+)', key, 'episode number', default=None)) - return { 'id': video_id, 'title': video_id, 'subtitles': subtitles, 'formats': formats, 'series': title, - 'season_number': season_number, - 'episode_number': episode_number, + 'season_number': int_or_none( + self._search_regex(r's(\d+)', key, 'season number', default=None)), + 'episode_number': int_or_none( + self._search_regex(r'e(\d+)', key, 'episode number', default=None)), 'http_headers': {'Referer': url} }