From 89beedd31f8a00a811787b651a506001833da0e1 Mon Sep 17 00:00:00 2001 From: Remita Amine Date: Tue, 11 Apr 2017 21:55:59 +0100 Subject: [PATCH] [youtube] improve m3u8 format extraction --- youtube_dl/extractor/youtube.py | 47 +++++++++++---------------------- 1 file changed, 15 insertions(+), 32 deletions(-) diff --git a/youtube_dl/extractor/youtube.py b/youtube_dl/extractor/youtube.py index 2506cd27c..9e2b9115c 100644 --- a/youtube_dl/extractor/youtube.py +++ b/youtube_dl/extractor/youtube.py @@ -1253,21 +1253,6 @@ def extract_id(cls, url): video_id = mobj.group(2) return video_id - def _extract_from_m3u8(self, manifest_url, video_id): - url_map = {} - - def _get_urls(_manifest): - lines = _manifest.split('\n') - urls = filter(lambda l: l and not l.startswith('#'), - lines) - return urls - manifest = self._download_webpage(manifest_url, video_id, 'Downloading formats manifest') - formats_urls = _get_urls(manifest) - for format_url in formats_urls: - itag = self._search_regex(r'itag/(\d+?)/', format_url, 'itag') - url_map[itag] = format_url - return url_map - def _extract_annotations(self, video_id): url = 'https://www.youtube.com/annotations_invideo?features=1&legacy=1&video_id=%s' % video_id return self._download_webpage(url, video_id, note='Searching for annotations.', errnote='Unable to download video annotations.') @@ -1573,19 +1558,6 @@ def _extract_count(count_name): if self._downloader.params.get('writeannotations', False): video_annotations = self._extract_annotations(video_id) - def _map_to_format_list(urlmap): - formats = [] - for itag, video_real_url in urlmap.items(): - dct = { - 'format_id': itag, - 'url': video_real_url, - 'player_url': player_url, - } - if itag in self._formats: - dct.update(self._formats[itag]) - formats.append(dct) - return formats - if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'): self.report_rtmp_download() formats = [{ @@ -1718,11 +1690,22 @@ def _map_to_format_list(urlmap): formats.append(dct) elif video_info.get('hlsvp'): manifest_url = video_info['hlsvp'][0] - url_map = self._extract_from_m3u8(manifest_url, video_id) - formats = _map_to_format_list(url_map) - # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming - for a_format in formats: + formats = [] + m3u8_formats = self._extract_m3u8_formats( + manifest_url, video_id, 'mp4', fatal=False) + for a_format in m3u8_formats: + itag = self._search_regex( + r'/itag/(\d+)/', a_format['url'], 'itag', default=None) + if itag: + a_format['format_id'] = itag + if itag in self._formats: + dct = self._formats[itag].copy() + dct.update(a_format) + a_format = dct + a_format['player_url'] = player_url + # Accept-Encoding header causes failures in live streams on Youtube and Youtube Gaming a_format.setdefault('http_headers', {})['Youtubedl-no-compression'] = 'True' + formats.append(a_format) else: unavailable_message = self._html_search_regex( r'(?s)]+id="unavailable-message"[^>]*>(.+?)',