diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 2d8aa8b7c..547f5b171 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -29,6 +29,7 @@ from ..utils import ( clean_html, dict_get, datetime_from_str, + error_to_compat_str, ExtractorError, format_field, float_or_none, @@ -2628,6 +2629,19 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): 'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA', }, 'playlist_mincount': 21, + }, { + 'note': 'Playlist with "show unavailable videos" button', + 'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q', + 'info_dict': { + 'title': 'Uploads from Phim Siêu Nhân Nhật Bản', + 'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q', + 'uploader': 'Phim Siêu Nhân Nhật Bản', + 'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q', + }, + 'playlist_mincount': 1400, + 'expected_warnings': [ + 'YouTube said: INFO - Unavailable videos are hidden', + ] }, { # https://github.com/ytdl-org/youtube-dl/issues/21844 'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba', @@ -3293,8 +3307,49 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): if errors: raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected) + def _reload_with_unavailable_videos(self, item_id, data, webpage): + """ + Get playlist with unavailable videos if the 'show unavailable videos' button exists. + """ + sidebar_renderer = try_get( + data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or [] + for item in sidebar_renderer: + if not isinstance(item, dict): + continue + renderer = item.get('playlistSidebarPrimaryInfoRenderer') + menu_renderer = try_get( + renderer, lambda x: x['menu']['menuRenderer']['items'], list) or [] + for menu_item in menu_renderer: + if not isinstance(menu_item, dict): + continue + nav_item_renderer = menu_item.get('menuNavigationItemRenderer') + text = try_get( + nav_item_renderer, lambda x: x['text']['simpleText'], compat_str) + if not text or text.lower() != 'show unavailable videos': + continue + browse_endpoint = try_get( + nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {} + browse_id = browse_endpoint.get('browseId') + params = browse_endpoint.get('params') + if not browse_id or not params: + return + ytcfg = self._extract_ytcfg(item_id, webpage) + headers = self._generate_api_headers( + ytcfg, account_syncid=self._extract_account_syncid(ytcfg), + identity_token=self._extract_identity_token(webpage, item_id=item_id), + visitor_data=try_get( + self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str)) + query = { + 'params': params, + 'browseId': browse_id + } + return self._extract_response( + item_id=item_id, headers=headers, query=query, + check_get_keys='contents', fatal=False, + note='Downloading API JSON with unavailable videos') + def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, - ytcfg=None, check_get_keys=None, ep='browse'): + ytcfg=None, check_get_keys=None, ep='browse', fatal=True): response = None last_error = None count = -1 @@ -3308,8 +3363,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): try: response = self._call_api( ep=ep, fatal=True, headers=headers, - video_id=item_id, - query=query, + video_id=item_id, query=query, context=self._extract_context(ytcfg), api_key=self._extract_api_key(ytcfg), note='%s%s' % (note, ' (retry #%d)' % count if count else '')) @@ -3320,7 +3374,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): last_error = 'HTTP Error %s' % e.cause.code if count < retries: continue - raise + if fatal: + raise + else: + self.report_warning(error_to_compat_str(e)) + return + else: # Youtube may send alerts if there was an issue with the continuation page self._extract_alerts(response, expected=False) @@ -3330,7 +3389,11 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): # See: https://github.com/ytdl-org/youtube-dl/issues/28194 last_error = 'Incomplete data received' if count >= retries: - self._downloader.report_error(last_error) + if fatal: + raise ExtractorError(last_error) + else: + self.report_warning(last_error) + return return response def _extract_webpage(self, url, item_id): @@ -3389,6 +3452,9 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor): webpage, data = self._extract_webpage(url, item_id) + # YouTube sometimes provides a button to reload playlist with unavailable videos. + data = self._reload_with_unavailable_videos(item_id, data, webpage) or data + tabs = try_get( data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list) if tabs: